{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import lightgbm as lgbm\n",
    "from lightgbm.sklearn import LGBMClassifier\n",
    "from sklearn.model_selection import GridSearchCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>msno</th>\n",
       "      <th>song_id</th>\n",
       "      <th>artist_name</th>\n",
       "      <th>composer</th>\n",
       "      <th>id</th>\n",
       "      <th>source_system_tab</th>\n",
       "      <th>source_screen_name</th>\n",
       "      <th>source_type</th>\n",
       "      <th>target</th>\n",
       "      <th>type</th>\n",
       "      <th>bd</th>\n",
       "      <th>city</th>\n",
       "      <th>registered_via</th>\n",
       "      <th>song_length</th>\n",
       "      <th>genre_ids</th>\n",
       "      <th>language</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>145843</td>\n",
       "      <td>100728</td>\n",
       "      <td>104218</td>\n",
       "      <td>127622</td>\n",
       "      <td>-1</td>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "      <td>34</td>\n",
       "      <td>1.0</td>\n",
       "      <td>train</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>206471.0</td>\n",
       "      <td>359.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>145843</td>\n",
       "      <td>42228</td>\n",
       "      <td>108460</td>\n",
       "      <td>108460</td>\n",
       "      <td>-1</td>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "      <td>34</td>\n",
       "      <td>1.0</td>\n",
       "      <td>train</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>187802.0</td>\n",
       "      <td>1011.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>145843</td>\n",
       "      <td>38308</td>\n",
       "      <td>130060</td>\n",
       "      <td>282453</td>\n",
       "      <td>-1</td>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "      <td>34</td>\n",
       "      <td>1.0</td>\n",
       "      <td>train</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>247803.0</td>\n",
       "      <td>1259.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>145843</td>\n",
       "      <td>358058</td>\n",
       "      <td>295932</td>\n",
       "      <td>295932</td>\n",
       "      <td>-1</td>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "      <td>34</td>\n",
       "      <td>1.0</td>\n",
       "      <td>train</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>181115.0</td>\n",
       "      <td>1011.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>145843</td>\n",
       "      <td>328100</td>\n",
       "      <td>193751</td>\n",
       "      <td>193751</td>\n",
       "      <td>-1</td>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "      <td>34</td>\n",
       "      <td>1.0</td>\n",
       "      <td>train</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>200713.0</td>\n",
       "      <td>465.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0    msno  song_id  artist_name  composer  id  source_system_tab  \\\n",
       "0           0  145843   100728       104218    127622  -1                 25   \n",
       "1           1  145843    42228       108460    108460  -1                 25   \n",
       "2           2  145843    38308       130060    282453  -1                 25   \n",
       "3           3  145843   358058       295932    295932  -1                 25   \n",
       "4           4  145843   328100       193751    193751  -1                 25   \n",
       "\n",
       "   source_screen_name  source_type  target   type  bd  city  registered_via  \\\n",
       "0                   7           34     1.0  train NaN     1               7   \n",
       "1                   7           34     1.0  train NaN     1               7   \n",
       "2                   7           34     1.0  train NaN     1               7   \n",
       "3                   7           34     1.0  train NaN     1               7   \n",
       "4                   7           34     1.0  train NaN     1               7   \n",
       "\n",
       "   song_length  genre_ids  language  \n",
       "0     206471.0      359.0      52.0  \n",
       "1     187802.0     1011.0      52.0  \n",
       "2     247803.0     1259.0      52.0  \n",
       "3     181115.0     1011.0      52.0  \n",
       "4     200713.0      465.0      52.0  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path = 'Data/'\n",
    "data = pd.read_csv(path+'data_label_all.csv')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=data.drop(['Unnamed: 0'],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "train=data[data['type']=='train']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>msno</th>\n",
       "      <th>song_id</th>\n",
       "      <th>artist_name</th>\n",
       "      <th>composer</th>\n",
       "      <th>source_system_tab</th>\n",
       "      <th>source_screen_name</th>\n",
       "      <th>source_type</th>\n",
       "      <th>bd</th>\n",
       "      <th>city</th>\n",
       "      <th>registered_via</th>\n",
       "      <th>song_length</th>\n",
       "      <th>genre_ids</th>\n",
       "      <th>language</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>145843</td>\n",
       "      <td>100728</td>\n",
       "      <td>104218</td>\n",
       "      <td>127622</td>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "      <td>34</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>206471.0</td>\n",
       "      <td>359.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>145843</td>\n",
       "      <td>42228</td>\n",
       "      <td>108460</td>\n",
       "      <td>108460</td>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "      <td>34</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>187802.0</td>\n",
       "      <td>1011.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>145843</td>\n",
       "      <td>38308</td>\n",
       "      <td>130060</td>\n",
       "      <td>282453</td>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "      <td>34</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>247803.0</td>\n",
       "      <td>1259.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>145843</td>\n",
       "      <td>358058</td>\n",
       "      <td>295932</td>\n",
       "      <td>295932</td>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "      <td>34</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>181115.0</td>\n",
       "      <td>1011.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>145843</td>\n",
       "      <td>328100</td>\n",
       "      <td>193751</td>\n",
       "      <td>193751</td>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "      <td>34</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>200713.0</td>\n",
       "      <td>465.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     msno  song_id  artist_name  composer  source_system_tab  \\\n",
       "0  145843   100728       104218    127622                 25   \n",
       "1  145843    42228       108460    108460                 25   \n",
       "2  145843    38308       130060    282453                 25   \n",
       "3  145843   358058       295932    295932                 25   \n",
       "4  145843   328100       193751    193751                 25   \n",
       "\n",
       "   source_screen_name  source_type  bd  city  registered_via  song_length  \\\n",
       "0                   7           34 NaN     1               7     206471.0   \n",
       "1                   7           34 NaN     1               7     187802.0   \n",
       "2                   7           34 NaN     1               7     247803.0   \n",
       "3                   7           34 NaN     1               7     181115.0   \n",
       "4                   7           34 NaN     1               7     200713.0   \n",
       "\n",
       "   genre_ids  language  \n",
       "0      359.0      52.0  \n",
       "1     1011.0      52.0  \n",
       "2     1259.0      52.0  \n",
       "3     1011.0      52.0  \n",
       "4      465.0      52.0  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train=train.drop(['id','type'],axis=1)\n",
    "x_train=train.drop(['target'],axis=1)\n",
    "x_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train=train['target']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "LightGBM的主要的超参包括：\n",
    "1. 树的数目n_estimators 和 学习率 learning_rate\n",
    "2. 树的最大深度max_depth 和 树的最大叶子节点数目num_leaves（注意：XGBoost只有max_depth，LightGBM采用叶子优先的方式生成树，num_leaves很重要，设置成比 2^max_depth 小）\n",
    "3. 叶子结点的最小样本数:min_data_in_leaf(min_data, min_child_samples)\n",
    "4. 每棵树的列采样比例：feature_fraction/colsample_bytree\n",
    "5. 每棵树的行采样比例：bagging_fraction （需同时设置bagging_freq=1）/subsample\n",
    "6. 正则化参数lambda_l1(reg_alpha), lambda_l2(reg_lambda)\n",
    "\n",
    "7. 两个非模型复杂度参数，但会影响模型速度和精度。可根据特征取值范围和样本数目修改这两个参数\n",
    "1）特征的最大bin数目max_bin：默认255；\n",
    "2）用来建立直方图的样本数目subsample_for_bin：默认200000。\n",
    "\n",
    "对n_estimators，用LightGBM内嵌的cv函数调优，因为同XGBoost一样，LightGBM学习的过程内嵌了cv，速度极快。\n",
    "其他参数用GridSearchCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=30)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. n_estimators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "MAX_ROUNDS = 10000\n",
    "def get_n_estimators(params, x_train, y_train, early_stopping_rounds=10):\n",
    "    lgbm_params = params.copy()\n",
    "    lgbmtrain = lgbm.Dataset(x_train, y_train)\n",
    "    cv_result = lgbm.cv(lgbm_params, lgbmtrain, num_boost_round=MAX_ROUNDS, \n",
    "                        nfold=3, metrics='auc', \n",
    "                        early_stopping_rounds=early_stopping_rounds, \n",
    "                        seed=40)\n",
    "    \n",
    "    print('best n_estimators:',len(cv_result['auc-mean']))\n",
    "    print('best cv score:', cv_result['auc-mean'][-1])\n",
    "    \n",
    "    return len(cv_result['auc-mean'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best n_estimators: 251\n",
      "best cv score: 0.7110965969436097\n"
     ]
    }
   ],
   "source": [
    "params = {'boosting_type':'goss',\n",
    "         'objective':'binary',\n",
    "         'n_jobs':-1,\n",
    "         'learning_rate':0.5,\n",
    "         'num_leaves': 50,\n",
    "         'max_depth':12,\n",
    "         'colsample_bytree':0.7,\n",
    "         'verbosity':5}\n",
    "n_estimators_1 = get_n_estimators(params, x_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. num_leaves & max_depth=12"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 9 candidates, totalling 27 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CV] num_leaves=100 ..................................................\n",
      "[CV] ......... num_leaves=100, score=0.7127153791064413, total= 1.1min\n",
      "[CV] num_leaves=100 ..................................................\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.6min remaining:    0.0s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CV] ......... num_leaves=100, score=0.7134039116391391, total= 1.1min\n",
      "[CV] num_leaves=100 ..................................................\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  3.1min remaining:    0.0s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CV] ......... num_leaves=100, score=0.7113496897415811, total= 1.1min\n",
      "[CV] num_leaves=200 ..................................................\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  4.6min remaining:    0.0s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CV] ......... num_leaves=200, score=0.7217345906992811, total= 1.3min\n",
      "[CV] num_leaves=200 ..................................................\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  6.5min remaining:    0.0s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CV] ......... num_leaves=200, score=0.7227610288702917, total= 1.3min\n",
      "[CV] num_leaves=200 ..................................................\n",
      "[CV] ......... num_leaves=200, score=0.7217091421178199, total= 1.4min\n",
      "[CV] num_leaves=300 ..................................................\n",
      "[CV] ......... num_leaves=300, score=0.7273390158326646, total= 1.5min\n",
      "[CV] num_leaves=300 ..................................................\n",
      "[CV] .......... num_leaves=300, score=0.727248812167874, total= 1.5min\n",
      "[CV] num_leaves=300 ..................................................\n",
      "[CV] ......... num_leaves=300, score=0.7264231030632609, total= 1.6min\n",
      "[CV] num_leaves=400 ..................................................\n",
      "[CV] ......... num_leaves=400, score=0.7291629211799726, total= 1.7min\n",
      "[CV] num_leaves=400 ..................................................\n",
      "[CV] ......... num_leaves=400, score=0.7305186171560089, total= 1.6min\n",
      "[CV] num_leaves=400 ..................................................\n",
      "[CV] ......... num_leaves=400, score=0.7301943419418826, total= 1.6min\n",
      "[CV] num_leaves=500 ..................................................\n",
      "[CV] ........... num_leaves=500, score=0.73166333521766, total= 1.7min\n",
      "[CV] num_leaves=500 ..................................................\n",
      "[CV] ......... num_leaves=500, score=0.7326588632556407, total= 1.7min\n",
      "[CV] num_leaves=500 ..................................................\n",
      "[CV] ......... num_leaves=500, score=0.7319449753544459, total= 1.7min\n",
      "[CV] num_leaves=600 ..................................................\n",
      "[CV] ......... num_leaves=600, score=0.7338148747030802, total= 1.7min\n",
      "[CV] num_leaves=600 ..................................................\n",
      "[CV] ......... num_leaves=600, score=0.7328389271729748, total= 1.7min\n",
      "[CV] num_leaves=600 ..................................................\n",
      "[CV] ......... num_leaves=600, score=0.7331636785894545, total= 1.7min\n",
      "[CV] num_leaves=700 ..................................................\n",
      "[CV] ......... num_leaves=700, score=0.7332478667545625, total= 1.8min\n",
      "[CV] num_leaves=700 ..................................................\n",
      "[CV] ......... num_leaves=700, score=0.7356313493846884, total= 1.8min\n",
      "[CV] num_leaves=700 ..................................................\n",
      "[CV] ......... num_leaves=700, score=0.7341371756085914, total= 1.8min\n",
      "[CV] num_leaves=800 ..................................................\n",
      "[CV] ......... num_leaves=800, score=0.7344526327827259, total= 1.8min\n",
      "[CV] num_leaves=800 ..................................................\n",
      "[CV] ......... num_leaves=800, score=0.7345324331972343, total= 1.8min\n",
      "[CV] num_leaves=800 ..................................................\n",
      "[CV] ......... num_leaves=800, score=0.7347120164738697, total= 1.8min\n",
      "[CV] num_leaves=900 ..................................................\n",
      "[CV] ......... num_leaves=900, score=0.7330750625715597, total= 1.9min\n",
      "[CV] num_leaves=900 ..................................................\n",
      "[CV] ......... num_leaves=900, score=0.7346574398477523, total= 1.8min\n",
      "[CV] num_leaves=900 ..................................................\n",
      "[CV] ......... num_leaves=900, score=0.7347523381684705, total= 1.9min\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed: 65.9min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=30, shuffle=True),\n",
       "       error_score='raise-deprecating',\n",
       "       estimator=LGBMClassifier(boosting_type='goss', class_weight=None, colsample_bytree=0.7,\n",
       "        importance_type='split', learning_rate=0.1, max_depth=12,\n",
       "        min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n",
       "        n_estimators=251, n_jobs=-1, num_leaves=31, objective='binary',\n",
       "        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,\n",
       "        subsample=1.0, subsample_for_bin=200000, subsample_freq=0,\n",
       "        verbosity=5),\n",
       "       fit_params=None, iid='warn', n_jobs=None,\n",
       "       param_grid={'num_leaves': range(100, 1000, 100)},\n",
       "       pre_dispatch='2*n_jobs', refit=False, return_train_score='warn',\n",
       "       scoring='roc_auc', verbose=5)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = {'boosting_type':'goss',\n",
    "         'objective':'binary',\n",
    "         'n_jobs':-1,\n",
    "         'learning_rate':0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "#          'num_leaves': 512,\n",
    "         'max_depth':12,\n",
    "         'colsample_bytree':0.7,\n",
    "         'verbosity':5}\n",
    "\n",
    "lg = LGBMClassifier(silent=False, **params)\n",
    "num_leaves_s = range(100, 1000, 100)\n",
    "tuned_parameters = dict(num_leaves = num_leaves_s)\n",
    "grid_search = GridSearchCV(lg, param_grid=tuned_parameters, cv=kfold, scoring='roc_auc', verbose=5, refit=False)\n",
    "grid_search.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7345656941116091\n",
      "{'num_leaves': 800}\n"
     ]
    }
   ],
   "source": [
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\15067\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:125: FutureWarning: You are accessing a training score ('mean_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "C:\\Users\\15067\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:125: FutureWarning: You are accessing a training score ('std_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAELCAYAAADz6wBxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl8VfWd//HXh2xAwpYFZBMCsrizRBYXbGud2larba2CWsWltp2xP6vz60/7m9ZO/bUz05mOdhlbaxWpS0Wl2lJra61aRQUkKCigLCYsYQ1hS4AkJPn8/jgneIEshOTk3CTv5+ORR8793u+993NvLvfN+d7zPV9zd0RERI5Xt7gLEBGRjk1BIiIiraIgERGRVlGQiIhIqyhIRESkVRQkIiLSKgoSERFpFQWJiIi0ioJERERaJTXuAtpDbm6uDx8+PO4yREQ6lCVLluxw97zm+nWJIBk+fDiFhYVxlyEi0qGY2fpj6aehLRERaZVIg8TMLjKzVWa21szubOD6e81safiz2sx2h+3DzGxJ2L7CzL6WcJu/h/dZf7v+UT4HERFpWmRDW2aWAtwHXAiUAIvNbJ67r6zv4+63JfT/BjA+vLgFONvdq8wsC1ge3nZzeP3V7q6xKhGRJBDlHskkYK27F7l7NTAHuLSJ/jOAJwDcvdrdq8L2jIjrFBGRVojyA3owsDHhcknYdhQzGwbkAy8ntA01s3fD+/hRwt4IwMPhsNZ3zcwauc+bzazQzApLS0tb+1xERKQRUQZJQx/wja2iNR2Y6+61hzq6b3T3M4CTgOvMbEB41dXufjpwXvjz5Ybu0N0fcPcCdy/Iy2v26DURETlOUQZJCTA04fIQYHMjfacTDmsdKdwTWUEQGrj7pvB3OfBbgiE0ERGJSZTzSBYDo8wsH9hEEBZXHdnJzMYA/YAFCW1DgDJ3P2Bm/YBzgHvMLBXo6+47zCwNuBj4W4TPQUQEd2d/dS0VVTWUV9awr6rmsO3q2jqyM9Pp3yuDvF4Z5GZl0D0tJe6y201kQeLuNWZ2C/ACkALMcvcVZnY3UOju88KuM4A5fvji8ScD/21mTjBE9mN3f8/MMoEXwhBJIQiRX0f1HESk43J3qmrqqKiqoaIy+OA/aruB6/ZVBwFRURUGRmUNFdU1eGMD843o0yONvF4Zh8IlLyuD/r3rt7sH21kZ9O2ZRiNf9XYY5i19dTqggoIC18x2kY6rpraOlVv2snLz3kN7AvUf9OX1AZC4l1AdtNXUNf/51s0gKyOVXt3TyMxIISsjlazuaWSF25kZqfTKSCWre7Ad9E0lMz1o65WRRmqKUVZRTWlFJaXlVWzfW0VpRVWwXV7/u5LKg3VHPX5aipGXFQZMrwzyenU/PIDC7Tj2csxsibsXNNevS5wiRUQ6lto6Z+XmvSwsKmNBURmLi3dSXlVzWJ/M9BSyugcf7FnhB31OVs+jPvgPbSd88GdmpBza7p7WrU32CAb17QH0afR6d6eiquawcDlsu6KKkl0HWLpxN2X7qhvcA+rdPZX+vbsfCp76sAn2bj4KoPbey1GQiEjsauuc97eEwfFhGW+t20l5ZRAcI3IzufjMQUwdmcP4oX3p2zONzPRUunXrWMNBZkav7mn06p7GiLysJvvW1NZRtq86IWwqjwqeZSW72b63igMHa4+6fVqKkZsVhMpPp49neG5mVE8LUJCISAwSg2NhURmLij8KjvzcTC4+YxBTRmQzZUQOA3p3j7na9pea0o0Bvbs3+9zdnX3VteFwWiWlFUcPq/XMiH44TEEiIpGrq3Pe37qXhUU7gz2O4jL2HhYcA5kyIofJ+Tmc0KfrBcfxMrNDQ3v5Ee91NEVBIiJtrq7O+WBr+aHvON4q3smeAwcBGJ7Tk8+cHgbHiGwG9ukRc7XSWgoSEWm1ujpn1bbyw77j2L0/CI5hOT256NQTmDIym8n5OeGX0tKZKEhEpMXq6pzV28tZ+GGwx7Go+KPgODG7J/9wyoBwjyOHwQqOTk9BIiLNqqtz1myvOLTHsai4jF1hcAzN7sGFJw84NFQ1pF/PmKuV9qYgEZGjuB8ZHDvZua8agCH9enBBfXDkZzM0W8HR1SlIRASA7XsreW3NDl5dXcqba3dQFgbH4L49+PiY/ocOx1VwyJEUJCJdVFVNLUvW7eLVNaW8uqqUD7aWA5CblcH5o/OYMjKHqQoOOQYKEpEuwt1ZV7af11aX8trqUhYUlbG/upa0FKNgWDZ3XDSW80fncfLAXh3+JILSvhQkIp1YRVUNb67dwWtrSnlt9Q427NwPBHM5Lp84hGmj8pg6MofMDH0UyPHTu0ekE6mrc1Zu2cur4V7HkvW7qKlzeqancPbIXL5yXj7TRucxLCe+WdDS+ShIRDq4HRVVzA/3OOavKWVHRfAl+SkDe/OVaSOYNiqPicP6kZ4a5YKo0pUpSEQ6mIO1dby9flew17GmlOWb9gKQnZnOtFG5TBudx7mjcunfS+eskvahIBHpADbu3M+rq0t5dXUpCz4so6KqhtRuxoQT+/GtT41h2qg8Th3Uu8OdWl06BwWJSBLaX13DwqIyXl1VymtrdlC8Yx8QTAa8dNwgpo3O4+yROfTqnhZzpSIKEpGk4B6cLfe1cK+jcN0uqmvr6JGWwpQR2Vw3dRjTRueRn5upQ3Ml6ShIRGKyr6qGlz7Yfmhex/byKgDGntCLmecM5/zReRQM70dGavuu0y3SUgoSkXa2r6qGRxeu54HXiti5r5q+PdM496TgS/Jpo/K0sJN0OAoSkXZyZIB8bEweXz9/JAXDs0nRl+TSgSlIRCK2v7qGRxes51dhgJw/Oo9bPzmKCSf2i7s0kTahIBGJyP7qGh5buJ5fvVpEmQJEOjEFiUgbOzJApo3O49YLRjFxmAJEOicFiUgbOVBdGwTIax+yo6Ka80bl8s1PjmLisOy4SxOJlIJEpJUOVNfy+KL13P+qAkS6JgWJyHFqKEBuvWAUBcMVINK1KEhEWuijACliR0UV556Uy62fHMVZChDpohQkIseo8mAtjy/awP2vfkhpeRXnnJTDLy6YwKR8BYh0bQoSkWY0FCD3XaUAEamnIBFpROXBWn67aAO/DAPk7JE5/M+M8UwekRN3aSJJRUEicoTKg7U88dYGfvn3D9leXsXUETn8fMZ4pihARBqkIBEJHRkgU0Zk8zMFiEizFCTS5VUerGXOW8EQ1ra9VUzOz+an08czdaQCRORYRBokZnYR8FMgBXjQ3f/jiOvvBT4eXuwJ9Hf3vmY2DHgmvF0a8HN3vz+8zURgNtADeB641d09yuchnVPlwVqeXLyRX/x9Ldv2VjEpP5ufXKkAEWmpyILEzFKA+4ALgRJgsZnNc/eV9X3c/baE/t8AxocXtwBnu3uVmWUBy8PbbgZ+CdwMLCQIkouAP0f1PKTzqTxYy1OFG/nFKx+ydW8lk4Znc++V4zh7ZG7cpYl0SFHukUwC1rp7EYCZzQEuBVY20n8G8D0Ad69OaM8AuoX3MRDo7e4LwsuPAJehIJFj0FCA3HPlmUwdkaPla0VaIcogGQxsTLhcAkxuqGM4lJUPvJzQNhT4E3AS8C1332xmBeH9JN7n4DauWzqZqppanlq8kfvCADlreD/uueJMpo5UgIi0hSiDpKF/oY19lzEdmOvutYc6um8EzjCzQcDvzWxuS+7TzG4mGALjxBNPbEnd0oms2LyH255cyuptFRQM68d/X3EmZytARNpUlEFSAgxNuDwE2NxI3+nAPzV0RbgnsgI4D3gjvJ9m79PdHwAeACgoKNCX8V1MbZ3zwGtF3PPiKvr2TOeh6wr4xNj+ChCRCEQZJIuBUWaWD2wiCIurjuxkZmOAfsCChLYhQJm7HzCzfsA5wD3uvsXMys1sCrAIuBb4eYTPQTqgjTv3c/tTS1m8bhefPu0E/u3zp9MvMz3uskQ6rciCxN1rzOwW4AWCw3hnufsKM7sbKHT3eWHXGcCcIw7hPRn4bzNzguGsH7v7e+F1X+ejw3//jL5ol5C78/SSEr4/bwXdzLjnijP5/PjB2gsRiZh1hSkYBQUFXlhYGHcZEqGyiiq+/cx7/HXlNqaMyObHXzqTIf16xl2WSIdmZkvcvaC5fprZLh3eS+9v447fvcveAzV857Mnc8M5+XTrpr0QkfaiIJEOa19VDT/40/s88dYGxp7Qi8dumszYE3rHXZZIl6MgkQ5pyfpd3P7UUjbs3M9Xzx/B7ReOJiM1Je6yRLokBYl0KAdr6/jZS2u475W1DOzTgzlfmaL1QURipiCRDmPt9gpue3Ip723aw+UTh/C9S06hV/e0uMsS6fIUJJL06uqcRxeu59+ef5+e6Sncf80ELjptYNxliUhIQSJJbeueSr41dxnz1+zg42Py+NHlZ9C/V/e4yxKRBAoSSVrPvbuZf3l2OdU1dfzw86dx1aQTNblQJAkpSCTp7DlwkO/9YTm/X7qZcUP7cu+V48jPzYy7LBFphIJEksqba3fwz08vY3t5FbdfOJp//NhIUlO6xV2WiDRBQSJJofJgLf/1wioeer2YEbmZPPP1szlzaN+4yxKRY6Agkdglrhly7dRhfPvTJ9MjXZMLRToKBYnEJnHNkH490/nNDZM4f3Re3GWJSAspSCQWG8r2889PB2uGfPb0gfzgstO0ZohIB6UgkXbl7jxdWML3/7iCbt2Mn1w5jkvHDdJhvSIdmIJE2s2OcM2QF1duY+qIHH58xZkM7tsj7rJEpJUUJNIu/rZyG3c+ozVDRDojBYlEKlgzZCVPvLWRkwf25vGbxjHmhF5xlyUibUhBIpFJXDPka+eP5LYLR2nNEJFOSEEibS5xzZBBfXvw5M1TmZSfHXdZIhIRBYm0qbXby7ntyWW8t2kPX5o4hLu0ZohIp6cgkTbz+3c2ccfv3iUzI5X7r5nIRaedEHdJItIOFCTSJuavKeWfn15GwbB+/Pyq8VozRKQLUZBIq63aWs4/PvY2o/pn8eB1BRrKEulidH5uaZXt5ZXcMHsxPdJTmDXzLIWISBekPRI5bgeqa7npN4Xs3FfNU1+dyiDNUhfpkhQkclxq65xb57zDe5v28OsvF3D6kD5xlyQiMdHQlhyXf3/+ff66cht3XXwKnzxlQNzliEiMFCTSYo8uWMeDrxcz8+zhXH9OftzliEjMFCTSIq98sJ3vzVvBJ0/uz3cvPiXuckQkCShI5Jit2LyHW377NicP7M1Pp48nRWfvFREUJHKMtuw5wI2zC+ndI41ZM88iM0PHaYhIQEEizaqoquHG2YWUVx5k1syzGNBbs9ZF5CP6b6U0qaa2jm/89m1WbSvnoesKOHlg77hLEpEkoz0SaZS7c/dzK3llVSl3X3oqHxvTP+6SRCQJKUikUbPeWMcjC9Zz87QRXD15WNzliEiSijRIzOwiM1tlZmvN7M4Grr/XzJaGP6vNbHfYPs7MFpjZCjN718yuTLjNbDMrTrjduCifQ1f1woqt/OBPK7no1BO486KxcZcjIkkssu9IzCwFuA+4ECgBFpvZPHdfWd/H3W9L6P8NYHx4cT9wrbuvMbNBwBIze8Hdd4fXf8vd50ZVe1f3bslubp3zDmcM6cu9V46jmw7zFZEmRLlHMglY6+5F7l4NzAEubaL/DOAJAHdf7e5rwu3NwHYgL8JaJVSyaz83/qaQ3KwMHry2gB7pWmNdRJoWZZAMBjYmXC4J245iZsOAfODlBq6bBKQDHyY0/zAc8rrXzDLaruSubW/lQW6YvZjKg7U8PPMs8nrppRWR5kUZJA2Nh3gjfacDc9299rA7MBsIPApc7+51YfO3gbHAWUA2cEeDD252s5kVmllhaWnp8dTfpRysreOfHn+botJ93H/NREYN6BV3SSLSQUQZJCXA0ITLQ4DNjfSdTjisVc/MegN/Ar7j7gvr2919iweqgIcJhtCO4u4PuHuBuxfk5WlUrCnuznd/v5z5a3bwb184nXNOyo27JBHpQKIMksXAKDPLN7N0grCYd2QnMxsD9AMWJLSlA88Cj7j700f0Hxj+NuAyYHlkz6CLuP/VIuYs3sgtHz+JKwqGNn8DEZEEkR215e41ZnYL8AKQAsxy9xVmdjdQ6O71oTIDmOPuicNeVwDTgBwzmxm2zXT3pcDjZpZHMHS2FPhaVM+hK/jTu1v40V8+4JIzB3H7haPjLkdEOiA7/PO7cyooKPDCwsK4y0g6S9bvYsavF3LG4D48dtNkuqfpCC0R+YiZLXH3gub6NTq0ZWafMrPLG2i/2swubG2BEq8NZfv5yiOFDOzTnQeuLVCIiMhxa+o7ku8DrzbQ/hJwdzTlSHvYs/8gM2e/RZ07D888i+zM9LhLEpEOrKkg6enuRx036+5bgczoSpIoVdfU8dXHCinZeYAHvlzAiLysuEsSkQ6uqSDpbmZHfRlvZmlAj+hKkqi4O3c+8y4Li3byn5efwaT87LhLEpFOoKkgeQb4tZkd2vsIt+8Pr5MO5mcvreWZtzdx2ydHc9n4Bk8yICLSYk0FyXeAbcB6M1tiZm8D64DS8DrpQJ59p4R7/7aaL0wYzP+64KS4yxGRTqTReSTuXgPcaWbfB+o/eda6+4F2qUzazKKiMu6Y+x5TRmTzH184g2Aup4hI22g0SMzsC0c0OdDXzJa6e3m0ZUlbKSqt4KuPLWFIdg9+dU0B6alay0xE2lZTM9svaaAtGzjDzG5096PO1CvJZee+aq6fvZgUM2bPnESfnmlxlyQinVBTQ1vXN9QenvL9KWByVEVJ61UerOXmRwrZsqeSJ74yhRNzesZdkoh0Ui0e53D39YD+a5vE6uqcb819l8L1u7j3inFMHNYv7pJEpBNrcZCY2VigKoJapI3c8+Jq/rhsM3dcNJbPnjEw7nJEpJNr6sv2P3L0QlTZwEDgmiiLkuP31OKN/M8ra5l+1lC+dv6IuMsRkS6gqS/bf3zEZQd2EoTJNSSsHyLJ4Y21O/i/z77HeaNy+X+XnabDfEWkXTT1ZfuhEzaa2TjgKoJ1QoqB30VfmrTEmm3lfO2xJYzIy+S+qyeQlqLDfEWkfTQ1tDWaYFXDGUAZ8CTB+iUfb6fa5BiVlldx/ezFdE9LYdbMs+jdXcdCiEj7aWpo6wNgPnCJu68FMLPb2qUqOWYHqmu56ZFCdlRU8dRXpzKknw7zFZH21dT4xxeBrcArZvZrM7uAYHlbSRJ1dc5tTy7l3ZLd/HT6eM4Y0jfukkSkC2o0SNz9WXe/EhgL/B24DRhgZr80s39op/qkCf/xlw/4y4qt/MtnTuZTp54Qdzki0kU1+42su+9z98fd/WJgCLAUuDPyyqRJjy1czwOvFXHt1GHceG5+3OWISBfWokN73H2nu//K3T8RVUHSvNdWl/K9eSv4+Jg87rr4FB3mKyKx0jGiHUxdnfOv81aQn5vJz6+aQKoO8xWRmOlTqIP5++rtFO3Yxzc+cRJZGU0ddCci0j4UJB3Mg/OLOaF3dz5zus6hJSLJQUHSgazcvJc3PyzjurOHa+a6iCQNfRp1IA+9XkyPtBSumnRi3KWIiByiIOkgtu+tZN6yTXypYIhWOhSRpKIg6SAeXbiemjrn+nM0Z0REkouCpAOoPFjLYwvXc8HYAeTnZsZdjojIYRQkHcAzb29i1/6D3HSe9kZEJPkoSJJcXZ3z0OtFnDqoN5Pzs+MuR0TkKAqSJPfqmlI+LN3HTefl61QoIpKUFCRJ7qH5xQzoncFnTx8UdykiIg1SkCSxD7bu5fW1O7h26nDSU/WnEpHkpE+nJPbQ/GAC4tWTNQFRRJKXgiRJlZZX8Yelm/nixMH07ZkedzkiIo2KNEjM7CIzW2Vma83sqMWwzOxeM1sa/qw2s91h+zgzW2BmK8zsXTO7MuE2+Wa2yMzWmNmTZtYpP2UfXbie6to6btAERBFJcpEFiZmlAPcBnwZOAWaY2SmJfdz9Nncf5+7jgJ8Dz4RX7QeudfdTgYuAn5hZ/YLkPwLudfdRwC7gxqieQ1wqD9by+ML1XDC2PyPysuIuR0SkSVHukUwC1rp7kbtXA3OAS5voPwN4AsDdV7v7mnB7M7AdyLPg+NdPAHPD2/wGuCyi+mPz+3c2Ubavmhs1AVFEOoAog2QwsDHhcknYdhQzGwbkAy83cN0kIB34EMgBdrt7TXP32VG5Ow+9XszJA3szdURO3OWIiDQryiBpaPacN9J3OjDX3WsPuwOzgcCjwPXuXteS+zSzm82s0MwKS0tLW1B2vF5bs4M12yu46VxNQBSRjiHKICkBhiZcHgJsbqTvdMJhrXpm1hv4E/Add18YNu8A+ppZ/Rqzjd6nuz/g7gXuXpCXl3ecT6H9PTi/iP69MrjkTE1AFJGOIcogWQyMCo+ySicIi3lHdjKzMUA/YEFCWzrwLPCIuz9d3+7uDrwCXB42XQf8IbJn0M5WbS1n/podXDt1mCYgikiHEdmnVfg9xi3AC8D7wFPuvsLM7jazzyV0nQHMCUOi3hXANGBmwuHB48Lr7gBuN7O1BN+ZPBTVc2hvs14vpntaN66aPCzuUkREjllq812On7s/Dzx/RNtdR1z+1wZu9xjwWCP3WURwRFinsqOiimeXbuLyiUPIzuyUU2NEpJPS+EmSeGzheqprNAFRRDoeBUkSqF8B8eNj8jipvyYgikjHoiBJAvOWbmZHRTU3nTci7lJERFpMQRIzd+fB14sYe0Ivzh6pCYgi0vEoSGL2+todrN5WwY2agCgiHZSCJGYPzi8mNyuDz43TBEQR6ZgUJDFas62cV1eXcu3UYWSkpsRdjojIcVGQxGjWG8VkpHbTCogi0qEpSGJSVlHFM29v4gsTBpOTlRF3OSIix01BEpPHF22gShMQRaQTUJDEoKqmlkcWrOf80XmMGtAr7nJERFpFQRKDYAJiFTdpBUQR6QQUJO2sfgXEMQN6ce5JuXGXIyLSagqSdvbmh2V8sLVcExBFpNNQkLSzB+cXkZuVrgmIItJpKEja0drtFbyyqpRrpgyje5omIIpI56AgaUez3igmPbUb10zRCogi0nkoSNrJzn3VPPN2CZ8fN5hcTUAUkU5EQdJOfrtoPZUH67hRh/yKSCejIGkHVTW1/GbBes4blctoTUAUkU5GQdIOnlu2hdLyKq2AKCKdkoIkYvUTEEf1z2LaKE1AFJHOR0ESsQVFZazcslcTEEWk01KQROyh+cXkZKZz2fjBcZciIhIJBUmEikoreOmD7VytCYgi0okpSCI0641i0lO68WVNQBSRTkxBEpHd+6uZu6SES8cNIq+XJiCKSOelIInI44s2aAKiiHQJCpIIVNfU8ciCdZx7Ui5jT+gddzkiIpFSkETgT+9tZtveKu2NiEiXoCBpY/UTEE/qn8X5o/LiLkdEJHIKkja2qHgnyzft5YZz8unWTRMQRaTzU5C0sQfnF9OvZxpfmKAJiCLSNShI2lDxjn289ME2rYAoIl2KgqQNPfxGMWnduvHlqZqAKCJdh4KkjezZf5CnC0u45MxB9O/VPe5yRETaTaRBYmYXmdkqM1trZnc2cP29ZrY0/FltZrsTrvuLme02s+eOuM1sMytOuN24KJ/DsfrtWxs4cLCWG8/VIb8i0rWkRnXHZpYC3AdcCJQAi81snruvrO/j7rcl9P8GMD7hLv4L6Al8tYG7/5a7z42k8ONwsLaO37y5jnNOyuGUQZqAKCJdS5R7JJOAte5e5O7VwBzg0ib6zwCeqL/g7i8B5RHW12aef28LW/dWam9ERLqkKINkMLAx4XJJ2HYUMxsG5AMvH+N9/9DM3g2HxmI9I2L9BMQReZl8bHT/OEsREYlFlEHS0Gw8b6TvdGCuu9cew/1+GxgLnAVkA3c0+OBmN5tZoZkVlpaWHku9x2Xxul28W7JHExBFpMuKMkhKgKEJl4cAmxvpO52EYa2muPsWD1QBDxMMoTXU7wF3L3D3gry86E5V8uD8Ivr2TOOLE4ZE9hgiIsksyiBZDIwys3wzSycIi3lHdjKzMUA/YMGx3KmZDQx/G3AZsLzNKm6h9WX7ePH9bVw9+UR6pGsCooh0TZEdteXuNWZ2C/ACkALMcvcVZnY3UOju9aEyA5jj7ocNe5nZfIIhrCwzKwFudPcXgMfNLI9g6Gwp8LWonkNzHn5jHandjGunDo+rBBGR2EUWJADu/jzw/BFtdx1x+V8bue15jbR/oq3qa409Bw7yVOFGLjljEAN6awKiiHRdmtl+nOa8tYH91bXcoEN+RaSLU5Ach/oJiFNH5HDa4D5xlyMiEisFyXH48/KtbN6jCYgiIqAgaTF356H5ReTnZvKJsZqAKCKiIGmhJet3saxkDzecM1wTEEVEUJC02IPzi+nTI40vTtQERBERUJC0yIay/fx15VaumnwiPdMjPXJaRKTDUJC0wMNvFtPNjOs0AVFE5BAFyTHaW3mQpxZv5OIzBnJCH01AFBGppyA5Rk++tZF91bXceO6IuEsREUkqCpJjUFNbx+w31zE5P5vTh2gCoohIIgXJMfjLiq1s2n1AExBFRBqgIDkGD71ezPCcnlxw8oC4SxERSToKkmYsWb+Ldzbs5vpz8knRBEQRkaMoSJox6/ViendP5XJNQBQRaZCCpAkbd+7nz8u3MGPyiWRmaAKiiEhDFCRNmP3mOrqZMfPs4XGXIiKStBQkTUhNMb44YQgD+/SIuxQRkaSl8ZomfPvTJ8ddgohI0tMeiYiItIqCREREWkVBIiIiraIgERGRVlGQiIhIqyhIRESkVRQkIiLSKgoSERFpFXP3uGuInJmVAuuP8+a5wI42LKetqK6WUV0to7paprPWNczd85rr1CWCpDXMrNDdC+Ku40iqq2VUV8uorpbp6nVpaEtERFpFQSIiIq2iIGneA3EX0AjV1TKqq2VUV8t06br0HYmIiLSK9khERKRVunyQmNksM9tuZssT2rLN7EUzWxP+7he2m5n9zMzWmtm7ZjYhopqGmtkrZva+ma0ws1uTpK7uZvaWmS0L6/p+2J5vZovCup40s/SwPSO8vDa8fngUdSXUl2Jm75jZc0lW1zoze8/MlppZYdgW698yfKy+ZjbXzD4I32tT467LzMaEr1P9z14z+2bcdYWPdVv4vl9uZk+E/x5if49efDXwAAAGrUlEQVSZ2a1hTSvM7JthW/u+Xu7epX+AacAEYHlC238Cd4bbdwI/Crc/A/wZMGAKsCiimgYCE8LtXsBq4JQkqMuArHA7DVgUPt5TwPSw/X7g6+H2PwL3h9vTgScj/lveDvwWeC68nCx1rQNyj2iL9W8ZPtZvgJvC7XSgbzLUlVBfCrAVGBZ3XcBgoBjokfDemhn3eww4DVgO9CRYqPBvwKj2fr0ifSN0lB9gOIcHySpgYLg9EFgVbv8KmNFQv4jr+wNwYTLVFb5x3wYmE0x4Sg3bpwIvhNsvAFPD7dSwn0VUzxDgJeATwHPhP5TY6wofYx1HB0msf0ugd/jBaMlU1xG1/APwRjLURRAkG4Hs8D3zHPCpuN9jwJeABxMufxf4P+39enX5oa1GDHD3LQDh7/5he/2bqV5J2BaZcJd4PMH//mOvKxw+WgpsB14EPgR2u3tNA499qK7w+j1AThR1AT8h+AdUF17OSZK6ABz4q5ktMbObw7a4/5YjgFLg4XA48EEzy0yCuhJNB54It2Oty903AT8GNgBbCN4zS4j/PbYcmGZmOWbWk2CPYyjt/HopSFrGGmiL7LA3M8sCfgd80933NtW1gbZI6nL3WncfR7AHMAloaGH7+sdul7rM7GJgu7svSWyOu64E57j7BODTwD+Z2bQm+rZXbakEQ7q/dPfxwD6CIZC46woeLPiu4XPA0811baAtivdYP+BSIB8YBGQS/D0be+x2qcvd3wd+RPCfur8Ay4CaJm4SSV0KkoZtM7OBAOHv7WF7CUHa1xsCbI6iADNLIwiRx939mWSpq5677wb+TjDO2tfMUht47EN1hdf3AXZGUM45wOfMbB0wh2B46ydJUBcA7r45/L0deJYggOP+W5YAJe6+KLw8lyBY4q6r3qeBt919W3g57ro+CRS7e6m7HwSeAc4mCd5j7v6Qu09w92nhY6yhnV8vBUnD5gHXhdvXEXxHUd9+bXjkwxRgT/3uY1syMwMeAt5393uSqK48M+sbbvcg+Mf1PvAKcHkjddXXeznwsocDs23J3b/t7kPcfTjBcMjL7n513HUBmFmmmfWq3yYY919OzH9Ld98KbDSzMWHTBcDKuOtKMIOPhrXqHz/OujYAU8ysZ/jvs/71Sob3WP/w94nAFwhet/Z9vdr6y5+O9hO+6FuAgwRpfSPBWOZLBMn+EpAd9jXgPoLvBd4DCiKq6VyC3c13gaXhz2eSoK4zgHfCupYDd4XtI4C3gLUEQxEZYXv38PLa8PoR7fD3/BgfHbUVe11hDcvCnxXAv4Ttsf4tw8caBxSGf8/fA/2SpK6eQBnQJ6EtGer6PvBB+N5/FMhIkvfYfIJQWwZcEMfrpZntIiLSKhraEhGRVlGQiIhIqyhIRESkVRQkIiLSKgoSERFpFQWJiIi0ioJEJAZmNtvMLm++p0jyU5CIiEirKEhEQmY23IIFnn4dLhL0VzPrYWZ/N7OCsE9ueE4vzGymmf3ezP5oZsVmdouZ3R6eTXehmWUf4+NONLNXw7MDv5BwjqSvmNliCxYS+114eo4+FiyU1S3s09PMNppZmpmNNLO/hPcz38zGhn2+ZMHCR8vM7LVIXjzp0hQkIocbBdzn7qcCu4EvNtP/NOAqghMx/hDY78HZdBcA1zb3YOHJOX8OXO7uE4FZ4f0APOPuZ7n7mQTnNLvR3fcQnArj/LDPJQRrYBwEHgC+Ed7P/wZ+Efa5C/hUeD+fa64mkZZKbb6LSJdS7O5Lw+0lBIueNeUVdy8Hys1sD/DHsP09gnOTNWcMQRi9GJwLkBSCc78BnGZmPyBYuTCLYLEkgCeBKwlOGDgd+EW45MDZwNPh/UBwLiiAN4DZZvYUwVlrRdqUgkTkcFUJ27VAD4L1Her33rs30b8u4XIdx/bvy4AV7j61getmA5e5+zIzm0lwQkoIzuD67+HQ2UTgZYL1MXZ7sFbMYdz9a2Y2GfgssNTMxrl72THUJnJMNLQl0rx1BB/Y8NEpw9vKKiDPzKZCMNRlZqeG1/UCtoTDX1fX38DdKwjOKPtTgjMd13qw8FmxmX0pvB8zszPD7ZHuvsjd7yJY8jVxPQqRVlOQiDTvx8DXzexNILct79jdqwnC6UdmtoxgyYCzw6u/S7DE8osEpy9P9CRwTfi73tXAjeH9rCBY0Q/gv8zsPTNbDrxG8B2LSJvRaeRFRKRVtEciIiKtoi/bRSJkZvcRrCmf6Kfu/nAc9YhEQUNbIiLSKhraEhGRVlGQiIhIqyhIRESkVRQkIiLSKgoSERFplf8PBiUe1nzkc+sAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "# plot CV误差曲线\n",
    "test_means = grid_search.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid_search.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid_search.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid_search.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "n_leafs = len(num_leaves_s)\n",
    "\n",
    "x_axis = num_leaves_s\n",
    "plt.plot(x_axis, test_means)\n",
    "#plt.errorbar(x_axis, -test_means, yerr=test_stds,label = ' Test')\n",
    "#plt.errorbar(x_axis, -train_means, yerr=train_stds,label = ' Train')\n",
    "plt.xlabel( 'num_leaves' )\n",
    "plt.ylabel( 'AUC' )\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. min_child_samples\n",
    "叶子节点的最小样本数目\n",
    "\n",
    "叶子节点数目：800，共2类，平均每类400个叶子节点\n",
    "每棵树的样本数目数目最少的类（稀有事件）的样本数目：350w * 2/3 = 233.3w\n",
    "所以每个叶子节点约 233.3w / 400 = 0.42w 个样本点\n",
    "\n",
    "搜索范围：3000-6000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 3 candidates, totalling 9 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done   4 out of   9 | elapsed:  9.6min remaining: 12.0min\n",
      "[Parallel(n_jobs=4)]: Done   6 out of   9 | elapsed: 16.6min remaining:  8.3min\n",
      "[Parallel(n_jobs=4)]: Done   9 out of   9 | elapsed: 20.0min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=30, shuffle=True),\n",
       "       error_score='raise-deprecating',\n",
       "       estimator=LGBMClassifier(boosting_type='goss', class_weight=None, colsample_bytree=0.7,\n",
       "        importance_type='split', learning_rate=0.1, max_depth=12,\n",
       "        min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n",
       "        n_estimators=251, n_jobs=4, num_leaves=800, objective='binary',\n",
       "        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,\n",
       "        subsample=1.0, subsample_for_bin=200000, subsample_freq=0,\n",
       "        verbosity=5),\n",
       "       fit_params=None, iid='warn', n_jobs=4,\n",
       "       param_grid={'min_child_samples': range(3000, 6000, 1000)},\n",
       "       pre_dispatch='2*n_jobs', refit=False, return_train_score='warn',\n",
       "       scoring='roc_auc', verbose=5)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = {'boosting_type':'goss',\n",
    "         'objective':'binary',\n",
    "         'n_jobs':4,\n",
    "         'learning_rate':0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "         'num_leaves': 800,\n",
    "         'max_depth':12,\n",
    "         'colsample_bytree':0.7,\n",
    "         'verbosity':5}\n",
    "\n",
    "lg = LGBMClassifier(silent=False, **params)\n",
    "min_child_samples_s = range(3000, 6000, 1000)\n",
    "tuned_parameters = dict(min_child_samples = min_child_samples_s)\n",
    "grid_search = GridSearchCV(lg,n_jobs=4, param_grid=tuned_parameters, cv=kfold, scoring='roc_auc', verbose=5, refit=False)\n",
    "grid_search.fit(x_train, y_train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7093970307088159\n",
      "{'min_child_samples': 3000}\n"
     ]
    }
   ],
   "source": [
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 3 candidates, totalling 9 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done   4 out of   9 | elapsed: 11.5min remaining: 14.3min\n",
      "[Parallel(n_jobs=4)]: Done   6 out of   9 | elapsed: 17.6min remaining:  8.8min\n",
      "[Parallel(n_jobs=4)]: Done   9 out of   9 | elapsed: 22.3min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=30, shuffle=True),\n",
       "       error_score='raise-deprecating',\n",
       "       estimator=LGBMClassifier(boosting_type='goss', class_weight=None, colsample_bytree=0.7,\n",
       "        importance_type='split', learning_rate=0.1, max_depth=12,\n",
       "        min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n",
       "        n_estimators=251, n_jobs=4, num_leaves=800, objective='binary',\n",
       "        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,\n",
       "        subsample=1.0, subsample_for_bin=200000, subsample_freq=0,\n",
       "        verbosity=5),\n",
       "       fit_params=None, iid='warn', n_jobs=4,\n",
       "       param_grid={'min_child_samples': range(1000, 4000, 1000)},\n",
       "       pre_dispatch='2*n_jobs', refit=False, return_train_score='warn',\n",
       "       scoring='roc_auc', verbose=5)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = {'boosting_type':'goss',\n",
    "         'objective':'binary',\n",
    "         'n_jobs':4,\n",
    "         'learning_rate':0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "         'num_leaves': 800,\n",
    "         'max_depth':12,\n",
    "         'colsample_bytree':0.7,\n",
    "         'verbosity':5}\n",
    "\n",
    "lg = LGBMClassifier(silent=False, **params)\n",
    "min_child_samples_s = range(1000, 4000, 1000)\n",
    "tuned_parameters = dict(min_child_samples = min_child_samples_s)\n",
    "grid_search = GridSearchCV(lg,n_jobs=4, param_grid=tuned_parameters, cv=kfold, scoring='roc_auc', verbose=5, refit=False)\n",
    "grid_search.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7195478899404906\n",
      "{'min_child_samples': 1000}\n"
     ]
    }
   ],
   "source": [
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  12 out of  15 | elapsed: 35.9min remaining:  9.0min\n",
      "[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed: 40.7min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=30, shuffle=True),\n",
       "       error_score='raise-deprecating',\n",
       "       estimator=LGBMClassifier(boosting_type='goss', class_weight=None, colsample_bytree=0.7,\n",
       "        importance_type='split', learning_rate=0.1, max_depth=12,\n",
       "        min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n",
       "        n_estimators=251, n_jobs=4, num_leaves=800, objective='binary',\n",
       "        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,\n",
       "        subsample=1.0, subsample_for_bin=200000, subsample_freq=0,\n",
       "        verbosity=5),\n",
       "       fit_params=None, iid='warn', n_jobs=4,\n",
       "       param_grid={'min_child_samples': range(500, 1000, 100)},\n",
       "       pre_dispatch='2*n_jobs', refit=False, return_train_score='warn',\n",
       "       scoring='roc_auc', verbose=5)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = {'boosting_type':'goss',\n",
    "         'objective':'binary',\n",
    "         'n_jobs':4,\n",
    "         'learning_rate':0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "         'num_leaves': 800,\n",
    "         'max_depth':12,\n",
    "         'colsample_bytree':0.7,\n",
    "         'verbosity':5}\n",
    "\n",
    "lg = LGBMClassifier(silent=False, **params)\n",
    "min_child_samples_s = range(500, 1000, 100)\n",
    "tuned_parameters = dict(min_child_samples = min_child_samples_s)\n",
    "grid_search = GridSearchCV(lg,n_jobs=4, param_grid=tuned_parameters, cv=kfold, scoring='roc_auc', verbose=5, refit=False)\n",
    "grid_search.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7246726331893844\n",
      "{'min_child_samples': 500}\n"
     ]
    }
   ],
   "source": [
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 4 candidates, totalling 12 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done   8 out of  12 | elapsed: 27.2min remaining: 13.6min\n",
      "[Parallel(n_jobs=4)]: Done  12 out of  12 | elapsed: 35.7min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=30, shuffle=True),\n",
       "       error_score='raise-deprecating',\n",
       "       estimator=LGBMClassifier(boosting_type='goss', class_weight=None, colsample_bytree=0.7,\n",
       "        importance_type='split', learning_rate=0.1, max_depth=12,\n",
       "        min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n",
       "        n_estimators=251, n_jobs=4, num_leaves=800, objective='binary',\n",
       "        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,\n",
       "        subsample=1.0, subsample_for_bin=200000, subsample_freq=0,\n",
       "        verbosity=5),\n",
       "       fit_params=None, iid='warn', n_jobs=4,\n",
       "       param_grid={'min_child_samples': range(100, 500, 100)},\n",
       "       pre_dispatch='2*n_jobs', refit=False, return_train_score='warn',\n",
       "       scoring='roc_auc', verbose=5)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = {'boosting_type':'goss',\n",
    "         'objective':'binary',\n",
    "         'n_jobs':4,\n",
    "         'learning_rate':0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "         'num_leaves': 800,\n",
    "         'max_depth':12,\n",
    "         'colsample_bytree':0.7,\n",
    "         'verbosity':5}\n",
    "\n",
    "lg = LGBMClassifier(silent=False, **params)\n",
    "min_child_samples_s = range(100, 500, 100)\n",
    "tuned_parameters = dict(min_child_samples = min_child_samples_s)\n",
    "grid_search = GridSearchCV(lg,n_jobs=4, param_grid=tuned_parameters, cv=kfold, scoring='roc_auc', verbose=5, refit=False)\n",
    "grid_search.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7334577334203787\n",
      "{'min_child_samples': 100}\n"
     ]
    }
   ],
   "source": [
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 9 candidates, totalling 27 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed: 37.5min\n",
      "[Parallel(n_jobs=4)]: Done  27 out of  27 | elapsed: 90.0min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=30, shuffle=True),\n",
       "       error_score='raise-deprecating',\n",
       "       estimator=LGBMClassifier(boosting_type='goss', class_weight=None, colsample_bytree=0.7,\n",
       "        importance_type='split', learning_rate=0.1, max_depth=12,\n",
       "        min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,\n",
       "        n_estimators=251, n_jobs=4, num_leaves=800, objective='binary',\n",
       "        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,\n",
       "        subsample=1.0, subsample_for_bin=200000, subsample_freq=0,\n",
       "        verbosity=5),\n",
       "       fit_params=None, iid='warn', n_jobs=4,\n",
       "       param_grid={'min_child_samples': range(10, 100, 10)},\n",
       "       pre_dispatch='2*n_jobs', refit=False, return_train_score='warn',\n",
       "       scoring='roc_auc', verbose=5)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = {'boosting_type':'goss',\n",
    "         'objective':'binary',\n",
    "         'n_jobs':4,\n",
    "         'learning_rate':0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "         'num_leaves': 800,\n",
    "         'max_depth':12,\n",
    "         'colsample_bytree':0.7,\n",
    "         'verbosity':5}\n",
    "\n",
    "lg = LGBMClassifier(silent=False, **params)\n",
    "min_child_samples_s = range(10, 100, 10)\n",
    "tuned_parameters = dict(min_child_samples = min_child_samples_s)\n",
    "grid_search = GridSearchCV(lg,n_jobs=4, param_grid=tuned_parameters, cv=kfold, scoring='roc_auc', verbose=5, refit=False)\n",
    "grid_search.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7354017660696116\n",
      "{'min_child_samples': 40}\n"
     ]
    }
   ],
   "source": [
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\15067\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:125: FutureWarning: You are accessing a training score ('mean_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "C:\\Users\\15067\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:125: FutureWarning: You are accessing a training score ('std_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAD8CAYAAABpcuN4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xd8VHX2//HXSSWFnlADCZCEpoAQAWkqCtYFu7CK2GUVdUHXdb+7ui7u7k9dERuLvawNFV3FsiCCIAICAQkQIBCaCTWU0EIq5/fHXHTElCEkuTPJeT4eeWTmM7ecmUzyzi1zrqgqxhhjTFmC3C7AGGOMf7OgMMYYUy4LCmOMMeWyoDDGGFMuCwpjjDHlsqAwxhhTLgsKY4wx5fIpKETkQhHJEJFMEXmwlMcnicgK52u9iOQ64/EisswZTxeRMV7zzHWWeXy+Zs74jSKS4zV+a1U9WWOMMSdPKvrAnYgEA+uBIUA2sBQYqaprypj+buAMVb1ZRMKcdRSISDSwGuinqttFZC5wv6qmnjD/jUCKqo49tadmjDGmKoT4ME1vIFNVNwGIyFRgOFBqUAAjgb8CqGqh13g41bSrKyYmRhMSEqpj0cYYU2stW7Zsj6rGVjSdL0HRGsjyup8N9CltQhGJB9oBc7zG2gBfAInAH1R1u9csr4tICfAR8Hf9efPmShEZhGdLZpyqeq//VxISEkhNTS1vEmOMMScQka2+TOfLf/hSylhZ+6tGANNUteSnCVWzVLUbnqAYLSLNnYeuU9XTgYHO1yhn/DMgwZnna+DNUosSuV1EUkUkNScnx4enYYwxpjJ8CYpsoI3X/ThgexnTjgDeK+0BZ0siHU8ooKrbnO+HgHfx7OJCVfeqaoEz28tArzKW95KqpqhqSmxshVtOxhhjKsmXoFgKJIlIO+fg9Ahg+okTiUhHoDGwyGssTkQinNuNgf5AhoiEiEiMMx4KXIrnQDci0tJrscOAtZV5YsYYY6pGhccoVLVYRMYCM4Fg4DVVTReRCUCqqh4PjZHAVP3laVSdgYkionh2YT2pqqtEJAqY6YREMJ5dTC8789wjIsOAYmAfcOMpP0tjjDGVVuHpsYEgJSVF7WC2McacHBFZpqopFU1nn8w2xhhTLgsKY4wx5bKgMAEvN6+QD1OzKC455nYpxtRKvnzgzhi/dbigmNGvLSEt+wCFJce4rk+82yUZU+vYFoUJWPlFJdz2Ziqrtx+kTZMInp+TSX5RScUzGmNOigWFCUjFJce4+70fWLRpL09e3Y3HrujGjgP5TF3yo9ulGVPrWFCYgHPsmPLARyuZtWYXfxvWlcvPiKNfh6b0adeEyXM3crTQtiqMqUoWFCagqCoTPl/Dx8u3MX5IMqP7JQAgItw3tCM5hwp4+3uf+pwZY3xkQWECytNfb+CNhVu4uX877h6c+IvHerdrwsCkGKbM28iRgmKXKjSm9rGgMAHjte8288zsDVzVK46/XNIZkV83Nh43JJl9Rwp5Y+GWmi/QmFrKgsIEhGnLspnw+Rou6Nqcx644naCg0rrfQ8+2jTm3YywvfbuJg/lFNVylMbWTBYUfUlVy8wornrCOmJm+kz9+tJL+iU15ZsQZhASX/7YdP6QjB44W8dp3m2uoQmNqNwsKPzTp6w30fHQWr363mdrQtPFULMjcw93v/sDprRvy0qgU6oUGVzjP6XENGdqlOa/O32yBa0wVsKDwM2lZuUz+JpMmUeE8+vka/vTxKgqL62ZrihVZudz2n1TaxUTxxk1nEhXueyOBcUOSOVRQzCvzbavCmFNlQeFH8otKuO/DNJrVD2f2+LMZe24iU5dmcf0ri9l7uKDiBdQiGTsPcePrS4iJDuc/t/SmUWTYSc3fuWUDLunWktcXbGbfEduqMOZUWFD4kadmrSdz92Eev7IbDSNDuf+Cjjwzogdp2bkMe34B63YedLvEGpG1L49Rry4mLDiIt2/pQ/MG9Sq1nHHnJ3G0qIQX522s4gqNqVssKPxE6pZ9vDx/E7/t05ZByT9fA3x4j9Z8cMdZFJUc48p/L+Sr9J0uVln9dh/M57pXFlNQfIy3bulD26aRlV5WYrP6DO/RmjcXbWH3ofyqK9KYOsaCwg/kFRZz/4dptG4Uwf9d3PlXj3dv04jpYwfQoVk0d7y9jMnfZNbKg9y5eYWMenUJew4X8MZNZ9KxRf1TXua95yVRVKJMmWtbFcZUlgWFH3hiRgZb9ubxr6u6E13GAdsWDevxwR1ncWm3VvxrZgbj3l9RqzqlHiko5qY3lrJ5zxFeGpXCGW0bV8lyE2KiuLJna95Z/CM7DhytkmUaU9dYULhs4cY9vLFwCzf2S+CsDk3LnbZeaDDPjujB/UOT+WTFdq596Xt2Hwz8XSoFxSWMeXsZaVm5PDuyBwOSYqp0+XcPTkJVmfxNZpUu15i6woLCRYcLinlg2kraxUTxxws7+TSPiDB2cBIvXN+LDbsOMez5BazKPlDNlVaf4pJj3PveCuZv2MPjV3bjwtNaVvk62jSJ5JqUNry/NIvs/XlVvnxjajsLChf944u1bM89ypNXdyMirOIPknm78LQWTBvTj+Ag4eoXF/JZ2vZqqrL6qCp/+ngVM9J38tClXbg6pU21rWvs4EREhOdm21aFMSfLgsIl89bn8N6SH7ltYHt6xTep1DK6tGrAp2P7c1qrhtz93g889VUGx44FxkFuVeUfX6zlw2XZ3DM4kVsGtKvW9bVsGMFve7dl2vJstuw5Uq3rMqa2saBwwYGjRfxx2kqSmkUzbkjyKS0rJjqcd27rw9W94nh2TiZ3vrOcvEL/b7H9/JxMXvluMzf2Szjl18BXd57bgdBg4dnZG2pkfcbUFhYULpjw2RpyDhcw8ZruPvUuqkh4SDBPXNWNv1zSma/W7OSqKYvYluu/Z/j8Z9EWJs5az+VntObhS7uU2i68OjSrX49RfeP5ZMU2MncfrpF1GlMbWFDUsFlrdvHR8mzuPKcD3eIaVdlyRYRbB7bn1RvPJGtfHsOf/45lW/dV2fKryic/bOPhT9M5v3MznriqW5ntwqvLmLM7UC80mKe/Xl+j6zUmkFlQ1KD9Rwr508er6NyyAXcPTqqWdZzbsRn/vasfUeEhjHxpMdOWZVfLeirj6zW7uO/DNPq2b8Lzv+1JaAXtwqtD0+hwbuyXwBerdtSZlijGnCoLihr08PR0DhwtZOLV3QkLqb6XPrFZfT69qz8pCY25/8M0/vnlWkpcPsi9aONe7nx3OV1bNeCV0WdWyS63yrp9UHuiw0J4epYdqzDGFxYUNeTLVTv4LG079wxOokurBtW+vkaRYbx5c29G9Y3npW83ceubSznk0hXfVmZ72oW3bRLJGzf1LvPT5zWlUWQYNw9ox4z0nazeFrifQTGmplhQ1IA9hwv4yyerOb11Q353TocaW29ocBCPXnYaj152Gt9u2MMV/17I1r01e2po5u5DjH5tCQ0jQnnrlt40iTq5duHV5ZaB7WgYEcqkWXaswpiKWFBUM1Xlz/9dxeH8YiZe073Cy3hWh1F943nr5t7sPlTA8MkLWLhxT42sN3t/HqNeXUJwUBBv39qHlg0jamS9vmhQL5TbB7Vn9rrd/PDjfrfLMcavWVBUs09XbGdm+i7GD00mufmpd0OtrH6JMXx6V39iosO54dUlvP391mpdX86hAq5/ZTFHCor5z829aRcTVa3rq4wb+yXQJCqMp2yrwphyWVBUo10H8/nr9HR6tm3EbQPbu10OCTFRfHxnPwYmxfCXT1bz8KerKSqp+susHjhaxA2vLWHnwXxev+nMGjkmUxlR4SGMObs98zfsYekW/zuV2Bh/YUFRTY73MSooLuHJq7sTXMOfFyhLg3qhvDL6TG4b2I7/LNrKja8vITev6i4VerSwhFveWErm7kO8OCql0u1JasqovgnE1g9n4lcZbpdijN/yKShE5EIRyRCRTBF5sJTHJ4nICudrvYjkOuPxIrLMGU8XkTFe88x1lnl8vmbOeLiIvO+sa7GIJFTNU61ZHy7LZs663TxwQSfax0a7Xc4vBAcJf76kC/+6qhtLN+/nsskLquSTyoXFxxjz9jKW/7ifp689g7O9rtTnryLCgrnznA58v2kfCzNr5tiNMYGmwqAQkWBgMnAR0AUYKSJdvKdR1XGq2kNVewDPAR87D+0A+jnjfYAHRaSV16zXHZ9PVXc7Y7cA+1U1EZgEPH4Kz88V23OP8uhna+jTrgk39ktwu5wyXZ3Shndv68PhgmIun7yAuRm7K56pDCXHlHEfrGDe+hz+efnpXNKt6tuFV5eRvdvSsmE9Js5aXyuvHGjMqfJli6I3kKmqm1S1EJgKDC9n+pHAewCqWqiqBc54uI/rGw686dyeBpwnNdUMqAqoKn/8aCUlqvzrqu413qLiZKUkNOGTu/oT1ySSm99YyivzN530H0tV5S+frOKLlTv400WdGNG7bTVVWz3qhQZz17mJLNu6n3nrc9wuxxi/48sf7tZAltf9bGfsV0QkHmgHzPEaayMiK51lPK6q3hdOeN3Z7fSQVxj8tD5VLQYOAL+69JuI3C4iqSKSmpPjP7/c7yz+kfkb9vB/F3embdNIt8vxSVzjSKaNOYshXZrz9y/W8sePVlJQ7PtlVh+bsY73lmRx5zkduOPsmvucSFW6JqUNrRtF8JRtVRjzK74ERWn/Epf1mzQCmKaqP/2VUdUsVe0GJAKjRaS589B1qno6MND5GnUy61PVl1Q1RVVTYmP9Y1/4j3vz+OeXaxmYFMN1fQLrv+qo8BCmXNeLewYn8kFqNte/spg9hwsqnG/K3I28OG8T1/Vpyx8u6FgDlVaPsJAg7jkvkZXZB/h6beV3wRlTG/kSFNmA96XH4oCyLqc2Ame304mcLYl0PKGAqm5zvh8C3sWzi+sX6xOREKAh4PfnLh47pvxhWhrBIjx+Zbcaa51dlYKChPFDO/LsyDNYmX2A4c8vYO2OshvnvbN4K4/PWMdvurdiwvDTAvI5e7uiZxzxTSN5atb6gLkAlDE1wZegWAokiUg7EQnDEwbTT5xIRDoCjYFFXmNxIhLh3G4M9AcyRCRERGKc8VDgUmC1M9t0YLRz+ypgjgbAvoA3Fm5h8eZ9PPSbLrRq5D+fQK6MYd1b8eGYsyg+dowrpyxkZvrOX00zPW07f/lkNed2jOWpa/zn9N9TERocxL3nJbF2x8FSn7MxdVWFQeEcJxgLzATWAh+oarqITBCRYV6TjgSmnvBHvTOwWETSgHnAk6q6Cs+B7ZnOsYsVwDbgZWeeV4GmIpIJjAd+dTquv9mUc5gnZq7jvE7NuLpXnNvlVIlucY2YPnYASc2iueOtZUz+JvOnffffrNvN+PdXcGZ8E/59XS9X2oVXl+E9WtMhNopJX693veOuMf5CAuCf9QqlpKRoamqqK+suOaZc/cJCNuYcYda4QTRrUM+VOqpLflEJf/xoJZ+u2M6w7q24OiWO2/6TSofYaN67vS8N6oW6XWKV+yxtO3e/9wPPjOjB8B6lnrdhTK0gIstUNaWi6WrPv4IueXn+Jpb/mMuE4V1rXUiA59TRp6/twR8u6Mj0tO2MenUJrRpG8ObNvWtlSABccnpLOrWozzNfb6C4GlqcGBNoLChOwfpdh3jqq/Vc2LUFw7q3qniGACUi3HVuIi+N6sV5nZrx1q19iIkOd7usahMUJPz+/GQ27TnCJyvKOm/DmLrD3SvIBLCikmPc90Ea0fVC+PvlgX/Gjy+Gdm3B0K4t3C6jRlzQtTmntW7As7M3MLxHq1p1HMaYk2Xv/kqaMncjq7Yd4B+XnVar/7uuq0SE8UOS+XFfnl9dd9wYN1hQVEL69gM8O3sDw7q34qLTA6enkTk553ZsRo82jXhu9oaT+qS6MbWNBcVJKiz27HJqHBXGhOFd3S7HVCMR4b6hyWw/kM/7S7MqnsGYWsqC4iQ9O3sD63Ye4rErTqdRpH9c/9lUnwGJMfROaMLzczLJL7KtClM3WVCchLSsXKbM28hVveI4r3PzimcwAU9EGD80md2HCqr98rHG+CsLCh/lF5Vw34dpNKsfzkOXdql4BlNr9G3flH4dmvLCvI3kFRa7XY4xNc6CwkdPzVpP5u7DPHZlNxpG1M4Pmpmy3Tc0mT2HC3lzoW1VmLrHgsIHqVv28fL8TYzs3TYgLu9pql6v+CacnRzLi99u5FB+kdvlGFOjLCgqkFdYzP0fptG6UQR/vqSz2+UYF40fkkxuXhFvLNjidinG1CgLigo8MSODLXvzeOKqbkSH2wfZ67LubRpxfufmvDx/EweO2laFqTssKMqxcOMe3li4hRv7JdCvQ4zb5Rg/MH5IMgfzi3l1/ia3SzGmxlhQlOFwQTEPTFtJQtNIHrgwcC/xaapWl1YNuPj0Fry2YAv7jxS6XY4xNcKCogz/+GIt23KP8uTV3YkMs11O5me/Pz+ZI4XFvPitbVWYusGCohTz1ufw3pIfuW1ge1ISmrhdjvEzyc3rM6x7K95cuIWcQwVul2NMtbOgOMGBo0X8cdpKEptFM35IstvlGD9173lJFBSX8MK8jW6XYky1s6A4wYTP1pBzuICJV3enXmiw2+UYP9U+Nporesbx9vdb2XUw3+1yjKlWFhReZq3ZxUfLs/nd2R3o3qaR2+UYP3fveUmUHFMmf5PpdinGVCsLCsf+I4X86eNVdGpRn3vOS3K7HBMA2jSJ5OqUNkxdksW23KNul2NMtbGgcDw8PZ3cvEImXtOdsBB7WYxvxg5OBOD5ORtcrsSY6mN/EYEvV+3gs7Tt3HNeEl1bNXS7HBNAWjeKYETvNnyYms2Pe/PcLseYalHng2LP4QL+8slqTm/dkN+d08HtckwAuuvcRIKDhGdtq8LUUnU6KFSVP/93FYfzi5l4TXdCg+v0y2EqqXmDelzfN56Pl2ezKeew2+UYU+Xq9F/GT1dsZ2b6LsYPTSa5eX23yzEB7HfndCA8JJhnZttWhal96nRQNGsQziXdWnLbwPZul2ICXEx0OKP7JTA9bTvrdx1yuxxjqlSdDop+HWKY/NueBAeJ26WYWuCOQe2JCgvh6a/Xu12KMVWqTgeFMVWpcVQYN/dP4MtVO0nffsDtcoypMhYUxlShWwa2p0G9ECbNsmMVpvawoDCmCjWMCOW2ge35eu0u0rJy3S7HmCphQWFMFbtpQDsaR4by1Cw7VmFqBwsKY6pYdHgId5zdgXnrc1i2dZ/b5RhzynwKChG5UEQyRCRTRB4s5fFJIrLC+VovIrnOeLyILHPG00VkTCnzTheR1V73HxGRbV7Lu/hUnqAxbrjhrHhiosOY+JVtVZjAV2FQiEgwMBm4COgCjBSRLt7TqOo4Ve2hqj2A54CPnYd2AP2c8T7AgyLSymvZVwClfZR10vHlqeqXlXlixrgpMiyE352TyMKNe5m/Icftcow5Jb5sUfQGMlV1k6oWAlOB4eVMPxJ4D0BVC1X1+LUiw73XJyLRwHjg75Up3Bh/d12ftrRtEsmEz9ZQVHLM7XKMqTRfgqI1kOV1P9sZ+xURiQfaAXO8xtqIyEpnGY+r6nbnoUeBiUBpLTfHishKEXlNRBr7UKMxfqdeaDAPXdqFDbsP89airW6XY0yl+RIUpX1sWcuYdgQwTVVLfppQNUtVuwGJwGgRaS4iPYBEVf1vKcuYAnQAeuDZdTWx1KJEbheRVBFJzcmxTXvjn87v3IxBybFM+no9ew8XVDyDMX7Il6DIBtp43Y8Dtpcx7Qic3U4ncrYk0oGBwFlALxHZAnwHJIvIXGe6XapaoqrHgJfx7PoqbXkvqWqKqqbExsb68DSMqXkiwsOXduFoYQlPfpXhdjnGVIovQbEUSBKRdiIShicMpp84kYh0BBoDi7zG4kQkwrndGOgPZKjqFFVtpaoJwABgvaqe40zX0muxlwOrMSaAJTaL5sZ+CUxdmsWqbGvtYQJPhUGhqsXAWGAmsBb4QFXTRWSCiAzzmnQkMFVVvXdLdQYWi0gaMA94UlVXVbDKJ0RklXNc41xg3Ek8H2P80j3nJ9E0KoxHPkvnl78ixvg/qQ1v2pSUFE1NTXW7DGPK9cHSLB74aCVPX9uDy84o9XwQY2qUiCxT1ZSKprNPZhtTQ67qFUe3uIb888u1HC4odrscY3xmQWFMDQkKEh4Z1pXdhwqY/E2m2+UY4zMLCmNqUM+2jbmiZ2tenb+ZLXuOuF2OMT6xoDCmhj14YSdCg4W/f7HG7VKM8YkFhTE1rFmDetx9XhJfr93N3IzdbpdjTIUsKIxxwU39E2gXE8WEz9dQWGx9oIx/s6AwxgXhIcE8fGkXNuUc4c2FW9wux5hyWVAY45JzOzXj3I6xPDN7A7sP5btdjjFlsqAwxkUPXdqFguIS/jXD+kAZ/2VBYYyL2sdGc/OAdny4LJsVWblul2NMqSwojHHZ3YOTiK0fzl+np3PsWOC31DG1jwWFMS6LDg/hwQs7kZaVy8c/bHO7HGN+xYLCGD9w+RmtOaNtIx773zoO5Re5XY4xv2BBYYwfCAoSHvlNV/YcLuD5OdYHyvgXCwpj/ET3No24JiWO1xZsZmPOYbfLMeYnFhTG+JE/XNCJeiHBPPq59YEy/sOCwhg/Els/nHvPT2JuRg5z1u1yuxxjAAsKY/zODWcl0CE2igmfraGguMTtcoyxoDDG34SFBPHX33Rly948Xvtui9vlGGNBYYw/GpQcy/mdm/P8nA3sOmh9oIy7LCiM8VMPXdqZohLl8f+tc7sUU8dZUBjjp+KbRnHboHZ8/MM2lm3d73Y5pg6zoDDGj915TiLNG4TziPWBMi6yoDDGj0WFh/B/F3dm1bYDfLgsy+1yTB1lQWGMnxvWvRUp8Y15YkYGB45aHyhT8ywojPFzIsIjw7qyL6+QZ2dvcLscUwdZUBgTAE5r3ZARZ7blzYVb2LDrkNvlmDrGgsKYAHH/0GQiwoKZ8PkaVO3Atqk5FhTGBIim0eGMH5LM/A17mLXG+kCZmmNBYUwAub5vPMnNo3n0izXkF1kfKFMzLCiMCSChwZ4+UFn7jvLK/E1ul2PqCAsKYwJM/8QYLuzagsnfbGTHgaNul2PqAAsKYwLQny/pzDFV/t+X1gfKVD8LCmMCUJsmkdwxqD3T07azZPM+t8sxtZxPQSEiF4pIhohkisiDpTw+SURWOF/rRSTXGY8XkWXOeLqIjCll3ukistrrfhMRmSUiG5zvjU/lCRpTW/3unERaNazHI9PTKbE+UKYaVRgUIhIMTAYuAroAI0Wki/c0qjpOVXuoag/gOeBj56EdQD9nvA/woIi08lr2FcCJV5F/EJitqknAbOe+MeYEEWHB/N8lnVmz4yBTl/7odjmmFvNli6I3kKmqm1S1EJgKDC9n+pHAewCqWqiqBc54uPf6RCQaGA/8/YT5hwNvOrffBC7zoUZj6qRLTm9Jn3ZNeHJmBgfyrA+UqR6+BEVrwLttZbYz9isiEg+0A+Z4jbURkZXOMh5X1e3OQ48CE4G8ExbTXFV3ADjfm5WxrttFJFVEUnNycnx4GsbUPiLCX3/TlQNHi5j09Xq3yzG1lC9BIaWMlbVDdAQwTVV/+iSQqmapajcgERgtIs1FpAeQqKr/PemKf17uS6qaoqopsbGxlV2MMQGvS6sGXNcnnre+38q6nQfdLsfUQr4ERTbQxut+HLC9jGlH4Ox2OpGzJZEODATOAnqJyBbgOyBZROY6k+4SkZYAzvfdPtRoTJ02fkgy0eEh/G269YEyVc+XoFgKJIlIOxEJwxMG00+cSEQ6Ao2BRV5jcSIS4dxuDPQHMlR1iqq2UtUEYACwXlXPcWabDox2bo8GPq3MEzOmLmkcFcb9Q5NZtGkvM1bvdLscU8tUGBSqWgyMBWYCa4EPVDVdRCaIyDCvSUcCU/WX/850BhaLSBowD3hSVVdVsMrHgCEisgEY4tw3xlRgZO+2dGpRn79/sZajhdYHylQdqQ2bqSkpKZqamup2Gca47vtNexnx0vf8/vwkfn9+stvlGD8nIstUNaWi6eyT2cbUIn3bN+WSbi2ZMncj2ftPPKHQmMqxoDCmlvm/izsjgvWBMlXGgsKYWqZ1owh+d3YiX6zawcKNe9wux9QCFhTG1EJ3nN2e1o0imPDZGopLjrldjglwFhTG1EL1QoN56NLOrNt5iHeXWB8oc2osKIyppS7o2oJ+HZoy8av17D9S6HY5JoBZUBhTSx3vA3W4oJiJszLcLscEMAsKY2qxji3qM6pvPO8u/pH07QfcLscEKAsKY2q5cecn0zAi1PpAmUqzoDCmlmsYGcofLujEki37+HzlDrfLMQHIgsKYOuDaM9vQtVUD/vnlWvIKi90uxwQYCwpj6oDgIOGRYV3ZcSCfKXM3ul2OCTAWFMbUEWcmNGF4j1a8+O0msvZZHyjjOwsKY+qQBy/qRLAIj35uB7aN70LcLsAYU3NaNoxg7OBE/jUzg45/mUGjyFAaR4bRMDKUxs7tRpFhNI4MpVFkqHPbc7+h83hosP1/WddYUBhTx9w+qD0NI0LJ2p9H7pEi9ucVkptXxOY9R1iel0tuXiFFJWVvbUSHh/wUMD+Hied7o4hQGkf9MmAaRYRRv14IQUFSg8/SVCULCmPqmNDgIK7vG1/m46rKkcIS9h8p5MBRT5Dszysi1wmU/V7f9+cVkbUvj/15RRzML6KsvVlBgidIIkM9YeK15dI4KoyGzlizBuH0atvYQsXPWFAYY35BRIgODyE6PIQ2JzFfyTHlwFFPoBwPlrICZseBfNbuOMj+vCKOFv3ysq2DOzXj6RE9aFAvtGqfmKk0CwpjTJUIDhKaRIXRJCrspObLLyohN6+I3KOFfLdhD4/9bx2XTV7Ayzek0CE2upqqNSfDjkoZY1xVLzSYFg3r0alFA24d2J53bu3DgbwiLnt+AbPX7nK7PIMFhTHGz/Rp35Tpdw8gPiaSW/+TynOzN9ipvC6zoDDG+J3WjSL48I5+DO/eiomz1nPnO8s5UmCtR9xiQWGM8UsRYcFMurYHf764MzPTd3LFvxeyde8Rt8uqkywojDF+S0S4bVB73ry5NzsP5jPs+QXM35Djdll1jgWFMcbvDUyKZfrY/rTlsI5nAAAOXElEQVRoUI/Rry3h5W832XGLGmRBYYwJCPFNo/j4zn5c0LUF//hyLePeX0H+CZ/BMNXDgsIYEzCiwkP493U9uX9oMp+mbeeqFxayLfeo22XVehYUxpiAIiKMHZzEKzeksHVPHsOe+47Fm/a6XVatZkFhjAlI53Vuzn/v6k/DyFCue2Ux/1m0xY5bVBMLCmNMwEpsFs0nd/VnUHIsD3+azoMfraKg2I5bVDULCmNMQGtQL5RXbkhh7LmJvJ+axYiXvmfXwXy3y6pVLCiMMQEvKEi4/4KO/Pu6nmTsPMRvnvuO5T/ud7usWsOCwhhTa1x8eks+vrMf9UKDGfHi93ywNMvtkmoFn4JCRC4UkQwRyRSRB0t5fJKIrHC+1otIrjMeLyLLnPF0ERnjNc8MEUlzxl8QkWBn/BER2ea1vIur6skaY2q/Ti0aMH1sf3q3a8IDH63k4U9XU1RyzO2yAppUdJaA8wd8PTAEyAaWAiNVdU0Z098NnKGqN4tImLOOAhGJBlYD/VR1u4g0UNWDIiLANOBDVZ0qIo8Ah1X1SV+fREpKiqampvo6uTGmDiguOcbjM9bx8vzN9GnXhH9f15Om0eFul+VXRGSZqqZUNJ0vWxS9gUxV3aSqhcBUYHg5048E3gNQ1UJVLXDGw73Xp6oHnZshQBhg57UZY6pMSHAQf76kC09f24MVWbkMe34Bq7cdcLusgORLULQGvHf0ZTtjvyIi8UA7YI7XWBsRWeks43FV3e712ExgN3AIz1bFcWNFZKWIvCYijX19MsYYc6LLzmjNtDH9UFWunLKQT1dsc7ukgONLUJR2lfOy/vsfAUxT1Z9OZFbVLFXtBiQCo0WkuddjFwAt8WxtDHaGpwAdgB7ADmBiqUWJ3C4iqSKSmpNj3SSNMWU7Pa4h0+8eQPc2jbh36gr++eVaSo7ZTgxf+RIU2fCLa6zHAdvLmHYEzm6nEzlbEunAwBPG84HpOLuzVHWXqpao6jHgZTy7vkpb3kuqmqKqKbGxsT48DWNMXRYTHc47t/bhhrPieenbTdz4+hJy8wrdLisg+BIUS4EkEWnnHJwegecP+y+ISEegMbDIayxORCKc242B/kCGiESLSEtnPAS4GFjn3G/ptdjL8RwAN8aYUxYaHMSE4afx2BWns3jTPoY9v4CMnYfcLsvvVRgUqloMjAVmAmuBD1Q1XUQmiMgwr0lHAlP1l6dRdQYWi0gaMA94UlVXAVHAdOfYRRqe4xQvOPM8ISKrnMfOBcad2lM0xphfGtG7Le/d3pf8ohIu//cCZqze4XZJfq3C02MDgZ0ea4ypjF0H87njrWWsyMrl7sGJjDs/maCg0g7L1k5VeXqsMcbUSs0b1OP9O/pyTUocz83J5Pa3UjmUX+R2WX7HgsIYU6eFhwTz+JXdmDC8K3Mzcrhs8gI25hx2uyy/YkFhjKnzRIQbzkrg7Vv7sD+viMueX8CcdbvcLstvWFAYY4yjb/umfHb3ANo2jeSWN1OZ/E2mXQwJCwpjjPmF1o0imDamH8O6t+JfMzO4693lHCkodrssV1lQGGPMCSLCgnn62h78+eLOzFi9kyunLCRzd909bmFBYYwxpRARbhvUnjdu6s3Og/lc/Mx8Jn+TWSdblltQGGNMOQYlxzJr3NkM6dKcf83MYHgd7EJrQWGMMRWIrR/O5Ot68sL1vcg5XMDwyQt4fMY68otKKp65FrCgMMYYH114Wgu+Hnc2V/ZszZS5G7n4mfks3bLP7bKqnQWFMcachIaRoTxxVXfevqUPhSXHuPqFRTz86WoO1+IzoywojDGmEgYkxTDz94O4qX8Cb32/lQsmfcvcjN1ul1UtLCiMMaaSosJD+OtvujJtTD8iwoK58fWljP9gBfuP1K7rXFhQGGPMKeoV35gv7hnA3YMTmb5iO0MmzePLVTtqzae6LSiMMaYKhIcEc9/QjkwfO4CWDSO4853ljHl7GbsP5rtd2imzoDDGmCrUpVUD/ntnPx68qBNzM3I4/6l5fJCaFdBbFxYUxhhTxUKCgxhzdgf+d+9AOrVowAPTVnLDa0vI2pfndmmVYkFhjDHVpH1sNFNv78ujl53G8q37ueDpb3l9wWZKjgXW1oUFhTHGVKOgIGFU33i+Gn82vds14W+freGaFxeRufuQ26X5zILCGGNqQOtGEbx+45lMurY7G3MOc/Ez3/H8nA0B0WTQgsIYY2qIiHD5GXF8Pf5shnRtzpNfrWdYADQZtKAwxpgaFhMdzuTf9uTFUb3Y6zQZfOx//ttk0ILCGGNcckHXFswadzZX9YzjhXmeJoNLNvtfk0ELCmOMcVHDyFAev6rbT00Gr3lxEQ994l9NBi0ojDHGDwxIiuGrcYO4uX873l68laFPzeMbP2kyaEFhjDF+IjIshId/04VpY/oRGR7CTa8vZfz77jcZtKAwxhg/c7zJ4D2DE5me5mky+MVK95oMWlAYY4wfCg8JZrxXk8G73l3OHW+502TQgsIYY/zY8SaDf7qoE/PW53DeU/P4YGnNNhm0oDDGGD8XEhzEHU6Twc4tG/DARysZ9WrNNRm0oDDGmADRPjaaqbf15e+XncaKrFyGTvqWz9K2V/t6LSiMMSaABAUJ1/eN56txg+ifGEO7mKhqX2dIta/BGGNMlWvVKIJXRqfUyLp82qIQkQtFJENEMkXkwVIenyQiK5yv9SKS64zHi8gyZzxdRMZ4zTNDRNKc8RdEJNgZbyIis0Rkg/O9cVU9WWOMMSevwqBw/oBPBi4CugAjRaSL9zSqOk5Ve6hqD+A54GPnoR1AP2e8D/CgiLRyHrtGVbsDpwGxwNXO+IPAbFVNAmY7940xxrjEly2K3kCmqm5S1UJgKjC8nOlHAu8BqGqhqhY44+He61PVg87NECAMOH6u13DgTef2m8BlPtRojDGmmvgSFK2BLK/72c7Yr4hIPNAOmOM11kZEVjrLeFxVt3s9NhPYDRwCpjnDzVV1B4DzvZnPz8YYY0yV8yUopJSxsj7pMQKYpqo/NVVX1SxV7QYkAqNFpLnXYxcALfFsbQz2uWpARG4XkVQRSc3JyTmZWY0xxpwEX4IiG2jjdT8OKOvE3RE4u51O5GxJpAMDTxjPB6bz8+6sXSLSEsD5Xmr7RFV9SVVTVDUlNjbWh6dhjDGmMnwJiqVAkoi0E5EwPGEw/cSJRKQj0BhY5DUWJyIRzu3GQH8gQ0SivcIgBLgYWOfMNh0Y7dweDXxamSdmjDGmalT4OQpVLRaRscBMIBh4TVXTRWQCkKqqx0NjJDBVf9mApDMwUUQUzy6sJ1V1lbP7abqIhDvLnAO84MzzGPCBiNwC/MjPZ0MZY4xxgbjVtrYqiUgOsLWSs8cAe6qwnKpidZ0cq+vk+WttVtfJOZW64lW1wn33tSIoToWIpKpqzXy88SRYXSfH6jp5/lqb1XVyaqIu6/VkjDGmXBYUxhhjymVBAS+5XUAZrK6TY3WdPH+tzeo6OdVeV50/RmGMMaZ8tkVhjDGmXHUqKETkNRHZLSKrvcZcb2vu9MP6RkTWOm3X7/WH2kSknogs8WoH/zdnvJ2ILHbqet/5IGaNE5FgEflBRD73l7pEZIuIrHJa66c6Y/7wHmskItNEZJ3zPjvL7bpEpKPX5QlWiMhBEfm923U5tY1z3vOrReQ953fBH95f9zo1pYvI752xan+96lRQAG8AF54w5g9tzYuB+1S1M9AXuMtp5e52bQXAYKcdfA/gQhHpCzwOTHLq2g/cUsN1HXcvsNbrvr/Uda7Tdv/4KYtu/xwBngFmqGonoDue183VulQ1w+vyBL2APOC/btclIq2Be4AUVT0Nz4eCR+Dy+0tETgNuw9PRuztwqYgkUROvl6rWqS8gAVjtdT8DaOncbglk+EGNnwJD/Kk2IBJYjue6InuAEGf8LGCmC/XEOb8Ug4HP8Xzy3x/q2gLEnDDm6s8RaABsxjkm6S91nVDLUGCBP9TFzx2zm+DpXvE5cIHb7y88XSpe8br/EPBATbxedW2LojR+1dZcRBKAM4DF+EFtzu6dFXiaM84CNgK5qlrsTFJm2/lq9jSeX5Jjzv2mflKXAl+J58qOtztjbv8c2wM5wOvOrrpXRCTKD+ry5t1Q1NW6VHUb8CSeFkI7gAPAMtx/f60GBolIUxGJxNMjrw018HpZUPgREYkGPgJ+rz9f2MlVqlqinl0DcXg2eTuXNllN1iQilwK7VXWZ93Apk7pxSl9/Ve2J54qQd4nIIBdqOFEI0BOYoqpnAEfwoytHOvv6hwEful0L/NTAdDiea+u0AqLw/DxPVKPvL1Vdi2f31yxgBpCGZ7d1tbOg8LGteXUTkVA8IfGOqh6/lKxf1AagqrnAXDzHUBqJp+svlN92vrr0B4aJyBY8V1wcjGcLw+26UOfCXKq6G8/+9t64/3PMBrJVdbFzfxqe4HC7ruMuApar6i7nvtt1nQ9sVtUcVS3Cc2nnfvjH++tVVe2pqoOAfcAGauD1sqDwg7bmIiLAq8BaVX3KX2oTkVgRaeTcjsDzC7QW+Aa4yq26VPVPqhqnqgl4dlnMUdXr3K5LRKJEpP7x23j2u6/G5Z+jqu4EssRzKQCA84A1btfl5afLJzvcrutHoK+IRDq/m8dfL1ffXwAi0sz53ha4As/rVv2vV00ejHH7y3lRdwBFeP7LugXPvu3ZeJJ5NtDEhboG4NmMXQmscL4udrs2oBvwg1PXauBhZ7w9sATIxLO7INzFn+k5wOf+UJez/jTnKx34szPuD++xHkCq87P8BM+1Y/yhrkhgL9DQa8wf6vobnmvkrAbewnMVTtff98B8PKGVBpxXU6+XfTLbGGNMuWzXkzHGmHJZUBhjjCmXBYUxxphyWVAYY4wplwWFMcaYcllQGGOMKZcFhTHGmHJZUBhjjCnX/wfBDFUYRG0IbAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot CV误差曲线\n",
    "test_means = grid_search.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid_search.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid_search.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid_search.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "x_axis = min_child_samples_s\n",
    "\n",
    "plt.plot(x_axis, test_means)\n",
    "#plt.errorbar(x_axis, -test_scores, yerr=test_stds ,label = ' Test')\n",
    "#plt.errorbar(x_axis, -train_scores, yerr=train_stds,label =  +' Train')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. 列采样参数 sub_feature/feature_fraction/colsample_bytree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  12 out of  15 | elapsed: 36.1min remaining:  9.0min\n",
      "[Parallel(n_jobs=4)]: Done  15 out of  15 | elapsed: 45.0min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=30, shuffle=True),\n",
       "       error_score='raise-deprecating',\n",
       "       estimator=LGBMClassifier(boosting_type='goss', class_weight=None, colsample_bytree=1.0,\n",
       "        importance_type='split', learning_rate=0.1, max_depth=12,\n",
       "        min_child_samples=40, min_child_weight=0.001, min_split_gain=0.0,\n",
       "        n_estimators=251, n_jobs=4, num_leaves=800, objective='binary',\n",
       "        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,\n",
       "        subsample=1.0, subsample_for_bin=200000, subsample_freq=0,\n",
       "        verbosity=5),\n",
       "       fit_params=None, iid='warn', n_jobs=4,\n",
       "       param_grid={'colsample_bytree': [0.5, 0.6, 0.7, 0.8, 0.9]},\n",
       "       pre_dispatch='2*n_jobs', refit=False, return_train_score='warn',\n",
       "       scoring='roc_auc', verbose=5)"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = {'boosting_type':'goss',\n",
    "         'objective':'binary',\n",
    "         'n_jobs':4,\n",
    "         'learning_rate':0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "         'num_leaves': 800,\n",
    "         'max_depth':12,\n",
    "        # 'colsample_bytree':0.7,\n",
    "         'min_child_samples':40,\n",
    "         'verbosity':5}\n",
    "\n",
    "lg = LGBMClassifier(silent=False, **params)\n",
    "colsample_bytree_s = [i/10.0 for i in range(5,10)]\n",
    "tuned_parameters = dict(colsample_bytree = colsample_bytree_s)\n",
    "grid_search = GridSearchCV(lg,n_jobs=4, param_grid=tuned_parameters, cv=kfold, scoring='roc_auc', verbose=5, refit=False)\n",
    "grid_search.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7354017660696116\n",
      "{'colsample_bytree': 0.7}\n"
     ]
    }
   ],
   "source": [
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\15067\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:125: FutureWarning: You are accessing a training score ('mean_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "C:\\Users\\15067\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:125: FutureWarning: You are accessing a training score ('std_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAD8CAYAAABdCyJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl8VfWd//HXJwmEJSwBsiFg2EFAQa9oVVxBUq1Axw0cO+i4/4p2dOrWdn7Tn50Zqc7UVrSjFodh2qpYOhVaBQSKuC9BRfYdASEkiOwESPL5/XHP1QtcsnCT3Jvk/Xw8zuOe+z3fc87nngQ+Od9zz+eYuyMiInKyUhIdgIiINGxKJCIiEhclEhERiYsSiYiIxEWJRERE4qJEIiIicVEiERGRuCiRiIhIXJRIREQkLmmJDqA+dOrUyfPz8xMdhohIg7Jo0aId7p5VVb8mkUjy8/MpLCxMdBgiIg2KmX1enX4a2hIRkbgokYiISFyUSEREJC5KJCIiEhclEhERiYsSiYiIxCWuRGJmHcxsrpmtCV4zY/S5xMw+jZpKzWxMsOx5M1tsZp+Z2XQzywjaLzSzj82szMyuOWZ75VHbmhlP/CIiEr94z0geAua7e29gfvD+KO6+wN0Hu/tg4FLgAPB6sPhedz/D3U8HNgETgvZNwE3ACzH2eTCyPXcfFWf8laqocP711eVs+vJAXe5GRKRBizeRjAamBvNTgTFV9L8GmOXuBwDcfQ+AmRnQEvCgfaO7fwZUxBlfXDZ+uZ9pH23myklv8fqyokSGIiKStOJNJDnuvg0geM2uov9Y4MXoBjObAhQB/YBJ1dhnCzMrNLP3I0NkdaVHVgav3jOM/I6tuf23i3j0tRUcKU9obhMRSTpVJhIzm2dmS2NMo2uyIzPLAwYBc6Lb3f1moDOwAri+Gpvq5u4h4Abgl2bW8wT7uz1IOIUlJSU1CfUoXTu0Yvpd3+J7557Ks2+u54bfvM/2PaUnvT0RkcamykTi7sPdfWCMaQawPUgQkURRXMmmrgP+5O5HYuyjHJgGXF2NeLYGr+uBN4AhJ+j3nLuH3D2UlVVlzbFKpael8rMxA/nV2MEs27qHK598i3fW7ohrmyIijUW8Q1szgfHB/HhgRiV9xxE1rGVhvSLzwFXAysp2ZmaZZpYezHcCzgeWn3T0NTR68CnMnHA+7Vs153vPf8Ck+WuoqPD62r2ISFKKN5FMBEaY2RpgRPAeMwuZ2eRIJzPLB7oCC6PWNWCqmS0BlgB5wCNB/7PNbAtwLfCsmS0L1ukPFJrZYmABMNHd6y2RAPTKbsOM75/PqDM68x9zV/P3Uz/iq/2H6zMEEZGkYu6N/y/qUCjktV1G3t35/QebeOTPy+mU0Zyn/vZMzux23G00IiINlpktCq5JV0p3tp8kM+PGc0/lj3edR0qKcf2z7zHlnQ00hcQsIhJNiSROg7q049W7h3FRn2z+35+XM+GFT9hbetz3CUREGi0lklrQrlUzfvN3Z/Hwt/sxe1kRo556h5VFexIdlohIvVAiqSVmxh0X9eSFW89h/6Eyxjz9DtMXbUl0WCIidU6JpJad06Mjr94zjCFdM/nhHxbz4PTPKD1SnuiwRETqjBJJHchqk87vbj2HCZf0YlrhZr7763fZsGN/osMSEakTSiR1JDXF+OHIvky56Wy27T7IqElvM3vptkSHJSJS65RI6tgl/bL5y90X0CM7gzt/9zE/+8tyFX4UkUZFiaQedMlsxR/u+BY3nZfP829vYOxz77Nt98FEhyUiUiuUSOpJ87QUfjpqAE/dMISV2/Zw5ZNv8+bqk69KLCKSLJRI6tl3Tu/MzLsvICsjnfFTPuSJuaspV+FHEWnAlEgSoGdWBq98/3y+O+QUfjV/DTdN+ZAv9x1KdFgiIidFiSRBWjZP5T+uPYOJfzOIDzbs5Mon36Zw485EhyUiUmNKJAlkZowd2o3/ves80pulMPa595n81noVfhSRBkWJJAkMPKUdf777Ai7rn82/vLqCO3+3iD0q/CgiDYQSSZJo26IZz9x4Fj+5sj/zVxRz1aS3WbZ1d6LDEhGpkhJJEjEzbh3Wg5duP5dDRyr47q/fZdpHmzTUJSJJLa5EYmYdzGyuma0JXo97RKCZXWJmn0ZNpWY2Jlj2vJktNrPPzGy6mWUE7feZ2fKgfb6ZnRq1vfHB/taY2fhj99cYhPI78Oo9F3BO9w48+Mcl/PAPn3HwsAo/ikhyiveM5CFgvrv3BuYH74/i7gvcfbC7DwYuBQ4ArweL73X3M9z9dGATMCFo/wQIBe3TgccgnLiAfwbOAYYC/xwreTUGHTPS+e+bh/KDy3rzv59sYczT77CuZF+iwxIROU68iWQ0MDWYnwqMqaL/NcAsdz8A4O57AMzMgJaAB+0LIn2A94EuwfxIYK6773T3r4C5QEGcnyFppaYY947ow9Sbh1Ky7xCjJr3NXz7bmuiwRESOEm8iyXH3bQDBa3YV/ccCL0Y3mNkUoAjoB0yKsc4twKxg/hRgc9SyLUHbcczsdjMrNLPCkpKGXYrkwj5ZvHrPBfTNbcOEFz7hpzOXcbhMhR9FJDlUmUjMbJ6ZLY0xja7JjswsDxgEzIlud/ebgc7ACuD6Y9a5EQgBj0eaYmw65pVod3/O3UPuHsrKyqpJqEkpr11Lpt3xLW65oDv//e5Grnv2Pb7YpcKPIpJ4VSYSdx/u7gNjTDOA7UGCiCSK4ko2dR3wJ3c/7gYJdy8HpgFXR9rMbDjwY2CUu0fqh2wBukat2gVoMmM9zVJT+KfvnMZ//u2ZrCvex5VPvsWCVZUdchGRuhfv0NZMIPLNqfHAjEr6jiNqWMvCekXmgauAlcH7IcCzhJNI9P+Uc4DLzSwzuMh+Ocec4TQF3x6Ux8y7LyC3bQtunvIR/z5nlQo/ikjCxJtIJgIjzGwNMCJ4j5mFzGxypJOZ5RM+k1gYta4BU81sCbAEyAMeCZY9DmQAfwi+MjwTwN13Aj8DPgqmR4K2Jqd7p9a88v3zuT7UlacWrOV7z39AyV4VfhSR+mdN4Wa3UCjkhYWFiQ6jzrxcuJl/emUp7Vo2Y9K4IZzTo2OiQxKRRsDMFrl7qKp+urO9Ebgu1JVXvn8+rdPTuGHyBzyzcJ3uhheReqNE0kj0z2vLzAnnUzAgl4mzVnLb/yxi9wEVfhSRuqdE0oi0adGMp24Ywj9fdRoLVxfznafeYskWFX4UkbqlRNLImBk3n9+daXd8i/Jy5+r/fJffvf+5hrpEpM4okTRSZ3bL5C/3DOPcnh35yStLuXfap+w/VJbosESkEVIiacQ6tG7Of990Nv84og8zFm9l9NPvsLZ4b6LDEpFGRomkkUtJMe6+rDe/u+Ucdh04zKin3mHGp18kOiwRaUSUSJqI83t14tV7hjGgc1t+8NKn/OSVJRwq0zNORCR+SiRNSE7bFrxw27nccWEPfvf+Jq595j027zxQ9YoiIpVQImlimqWm8PAV/Xnue2exYcd+rnzyLeYt357osESkAVMiaaIuH5DLq3cPo2uHVtz6P4VMnLWSsnI940REak6JpAnr1rEVf7zrPMYN7cYzC9dxw+QPKN5TmuiwRKSBUSJp4lo0S+XRvxnEE9efwZItu7niybd5d92ORIclIg2IEokA8N0hXZgx4XzatUzjxskf8PSCtVToGSciUg1KJPK1PjltmDnhAq48vTOPz1nFLVM/YteBw4kOS0SSnBKJHKV1ehpPjh3Mz0YP4O21O7jyybf5dPOuRIclIkksrkRiZh3MbK6ZrQleM2P0uSR4ymFkKjWzMcGy581ssZl9ZmbTzSwjaL/PzJYH7fPN7NSo7ZVHbWtmPPFLbGbG976Vz/Q7zwPg2mfeZeq7G1X4UURiiusJiWb2GLDT3Sea2UNAprs/WEn/DsBaoIu7HzCztu6+J1j2C6A42NYlwAdBn7uAi939+qDfPnfPqEmcjf0JiXVp14HD3PfyYv66spjvnJ7HxKtPJyM9LdFhiUg9qK8nJI4GpgbzU4ExVfS/Bpjl7gcAopKIAS0BD9oXRPoA7wNd4oxTTlL7Vs2Z/HchHijoy2tLtjHqqbdZVaTCjyLyjXgTSY67bwMIXrOr6D8WeDG6wcymAEVAP2BSjHVuAWZFvW9hZoVm9n5kiCwWM7s96FdYUlJSjY8iJ5KSYvyfi3vx+1vPZc/BMkY//Tb/+/GWRIclIkmiyqEtM5sH5MZY9GNgqru3j+r7lbsfd50kWJYHfAZ0dvcjxyxLJZxEPnL3KVHtNwITgIvc/VDQ1tndt5pZD+CvwGXuvq6yz6ChrdpTvKeUu1/8hA827GTc0K7881UDaNEsNdFhiUgdqLWhLXcf7u4DY0wzgO1BgogkiuJKNnUd8Kdjk0iwj3JgGnB11AcYTjhZjYokkaDv1uB1PfAGMKSqzyC1J7ttC35/6zn8n4t78uKHm7n6P9/l8y/3JzosEUmgeIe2ZgLjg/nxwIxK+o4jaljLwnpF5oGrgJXB+yHAs4STSHHUOplmlh7MdwLOB5bH+RmkhtJSU3igoB/Pjw+x5auDfGfS28xZVpTosEQkQeJNJBOBEWa2BhgRvMfMQmY2OdLJzPKBrsDCqHUNmGpmS4AlQB7wSLDscSAD+MMxX/PtDxSa2WJgATDR3ZVIEuSy/jn85e4L6N6pNXf8dhH/+upyjqjwo0iTE9fXfxsKXSOpW4fKyvmXv6zgt+9/TujUTH5945lkt2mR6LBEJE719fVfEdLTUvnZmIH8auxglm7dzY/+d2miQxKReqREIrVm9OBTuPvS3sxbsZ0PN+xMdDgiUk+USKRW/f353clpm86js1aopIpIE6FEIrWqZfNU7hvRh0827WL2Un2TS6QpUCKRWnf1mV3onZ3BY3NW6VtcIk2AEonUurTUFB76dj827NjPSx9uSnQ4IlLHlEikTlzaL5tzunfgl/PWsO9QWaLDEZE6pEQidcLMePiK/ny5/zDPLay0FJqINHBKJFJnBndtz5Wn5/GbtzZQvKc00eGISB1RIpE6df/lfTlSXsET89YkOhQRqSNKJFKn8ju15sZzT+Xlws2sLdYDsUQaIyUSqXN3X9qLls1S+fnsVYkORUTqgBKJ1LmOGenceVEP5i7fzkcbVTpFpLFRIpF6ccsFPchpm86/vabSKSKNjRKJ1IuWzVO5d7hKp4g0RkokUm+uOUulU0Qao7gTiZl1MLO5ZrYmeM2M0eeS4EmHkanUzMYEy543s8Vm9pmZTTezjKD9TjNbEvR/28xOi9rew2a21sxWmdnIeD+D1I+01BQeLFDpFJHGpjbOSB4C5rt7b2B+8P4o7r7A3Qe7+2DgUuAA8Hqw+F53P8PdTwc2AROC9hfcfVCwzmPALwCChDIWGAAUAL82s9Ra+BxSDy7rn83Q7h341XyVThFpLGojkYwGpgbzU4ExVfS/Bpjl7gcA3H0PgJkZ0BLw6PZA60h7sL+X3P2Qu28A1gJDa+FzSD0wM350RX927DvMc2+uT3Q4IlILaiOR5Lj7NoDgNbuK/mOBF6MbzGwKUAT0AyZFtX/fzNYRPiO5J2g+BdgctfqWoO0oZna7mRWaWWFJSUnNPpHUqcFd23PloDwmv7VepVNEGoFqJRIzm2dmS2NMo2uyMzPLAwYBc6Lb3f1moDOwArg+qv1pd+8JPAj8JLKZGJs+7vuk7v6cu4fcPZSVlVWTMKUe3D+yL4fLKvjlfJVOEWnoqpVI3H24uw+MMc0AtgcJIpIoiivZ1HXAn9z9SIx9lAPTgKtjrPcS3wyZbQG6Ri3rAmytzueQ5JHfqTV/e043pn20mbXF+xIdjojEoTaGtmYC44P58cCMSvqOI2pYy8J6ReaBq4CVwfveUetdCUT+dJ0JjDWzdDPrDvQGPqyFzyH17O7LetOyWSqPzV6Z6FBEJA61kUgmAiPMbA0wIniPmYXMbHKkk5nlEz6TWBi1rgFTzWwJsATIAx4Jlk0ws2Vm9ilwH0GycvdlwMvAcmA28P3gbEYamE4Z6dxxYQ9eX76dQpVOEWmwrCmUqwiFQl5YWJjoMCSGA4fLuPjxN+iS2ZI/3nUe4RNTEUkGZrbI3UNV9dOd7ZJQrZqncd+IPny8aRdzlql0ikhDpEQiCXfNWV3olZ3Bz2erdIpIQ6REIgmXlprCQ5HSKR9trnoFEUkqSiSSFC7rn83Q/A78at5qlU4RaWCUSCQpmBkPX9GPHfsO8xuVThFpUJRIJGkM6ZbJlYPy+M1b6yneq9IpIg2FEokklUjplF/NU+kUkYZCiUSSSqR0yksqnSLSYCiRSNK5+7LetEhLUekUkQZCiUSSTqeMdO68qKdKp4g0EEokkpRuGdad7Dbp/NtrK2gKZXxEGjIlEklKrZqnce/XpVO2JzocEamEEokkrWuD0imPzV6p0ikiSUyJRJJWWmoKDxb0Y/2O/UxT6RSRpKVEIklteFA65ZcqnSKStJRIJKmZGQ+pdIpIUosrkZhZBzOba2ZrgtfMGH0uMbNPo6ZSMxsTLHvezBab2WdmNt3MMoL2O81sSdD/bTM7LWjPN7ODUdt6Jp74pWE4s1smVwzKVekUkSQV7xnJQ8B8d+8NzA/eH8XdF7j7YHcfDFwKHABeDxbf6+5nuPvpwCZgQtD+grsPCtZ5DPhF1CbXRbbn7nfGGb80EPeP7KfSKSJJKt5EMhqYGsxPBcZU0f8aYJa7HwBw9z0AFn6+akvAo9sDrSPt0nR179SaG4LSKetKVDpFJJnEm0hy3H0bQPCaXUX/scCL0Q1mNgUoAvoBk6Lav29m6wifkdwTtUp3M/vEzBaa2bAT7cjMbjezQjMrLCkpqdGHkuR0j0qniCSlKhOJmc0zs6UxptE12ZGZ5QGDgDnR7e5+M9AZWAFcH9X+tLv3BB4EfhI0bwO6ufsQ4D7gBTNrG2t/7v6cu4fcPZSVlVWTUCVJdcpI546LejJnmUqniCSTKhOJuw9394ExphnA9iBBRBJFcSWbug74k7sfibGPcmAacHWM9V4iGDJz90Pu/mUwvwhYB/Sp6jNI43HrsO5ktUnn0VkrVTpFJEnEO7Q1ExgfzI8HZlTSdxxRw1oW1isyD1wFrAze945a70pgTdCeZWapwXwPoDeg74Q2Ia2ap3Hv8D4s+vwrlU4RSRLxJpKJwAgzWwOMCN5jZiEzmxzpZGb5QFdgYdS6Bkw1syXAEiAPeCRYNsHMlpnZp4SHsCLJ6kLgMzNbDEwH7nR3jXE0MdeFutAzq7VKp4gkCWsKwwOhUMgLCwsTHYbUornLt3Pb/xTyL2MGcuO5pyY6HJFGycwWuXuoqn66s10apOH9szk7P5NfzlvDfpVOEUkoJRJpkMyMh6/oz459h/jNW7pMJpJISiTSYJ3ZLZNvD8zluTdVOkUkkZRIpEG7f2RfDpdV8OR8lU4RSRQlEmnQemRlMG5oN178UKVTRBJFiUQavEjplMdnr0p0KCJNkhKJNHhZbcKlU2YvK2LR57qtSKS+KZFIo/B16ZTXVDpFpL4pkUijECmdUvj5V7y+XKVTROqTEok0GpHSKT+fvZIylU4RqTdKJNJopKWm8GBBP9aX7Gda4eZEhyPSZCiRSKMy4rQczs7P5Im5Kp0iUl+USKRRMTMe+na4dMrktzYkOhyRJkGJRBqds04Nl0559s11lOw9lOhwRBo9JRJplO4f2ZdDZRX8av7qRIci0ugpkUij1CMrgxtUOkWkXsSdSMysg5nNNbM1wWtmjD6XmNmnUVOpmY0Jlj1vZovN7DMzm25mGcese42ZuZmFotoeNrO1ZrbKzEbG+xmkcVLpFJH6URtnJA8B8929NzA/eH8Ud1/g7oPdfTBwKXAAeD1YfK+7n+HupwObgAmR9cysDXAP8EFU22nAWGAAUAD8OvIcd5FoWW3Suf3CSOmUrxIdjkijVRuJZDQwNZifCoypov81wCx3PwDg7nsAzMyAlkB0fYufAY8B0Q+bGA285O6H3H0DsBYYGu+HkMbpm9IpK1Q6RaSO1EYiyXH3bQDBa3YV/ccCL0Y3mNkUoAjoB0wK2oYAXd39L8esfwoQfbfZlqBN5Dit09P4h+G9Kfz8K+aqdIpInahWIjGzeWa2NMY0uiY7M7M8YBAwJ7rd3W8GOgMrgOvNLAV4AvjHWJuJ0Xbcn5pmdruZFZpZYUlJSU3ClEbm+lBXemS1ZqJKp4jUiWolEncf7u4DY0wzgO1BgogkiuJKNnUd8Cd3PxJjH+XANOBqoA0wEHjDzDYC5wIzgwvuW4CuUat2AbbG2N5z7h5y91BWVlZ1PqY0UiqdIlK3amNoayYwPpgfD8yopO84ooa1LKxXZB64Cljp7rvdvZO757t7PvA+MMrdC4P9jTWzdDPrDvQGPqyFzyGN2OWn5RA6NZNfzlPpFJHaVhuJZCIwwszWACOC95hZyMwmRzqZWT7hM4mFUesaMNXMlgBLgDzgkcp25u7LgJeB5cBs4PvB2YzICZkZD1/Rj5K9Kp0iUtusKXyTJRQKeWFhYaLDkCRw528X8daaEt64/xKy2qQnOhyRpGZmi9w9VFU/3dkuTcoDBX0pLavgyflrEh2KSKOhRCJNSo+sDMYN7cqLH25ivUqniNQKJRJpcn5wWR+ap6Xw+ByVThGpDUok0uSES6f0YNZSlU4RqQ1KJNIk3TasB50y0pk4S6VTROKlRCJNUuv0NO4d0ZuPNqp0iki8lEikyYqUTvm5SqeIxEWJRJqsSOmUdSX7eblwS6LDEWmwlEikSbv8tBzOOjWTJ+at5sBhlU4RORlKJNKkmRk/UukUkbgokUiTd9apHRg5IIdnF65jx75DiQ5HpMFRIhEBHijop9IpIidJiUQE6BmUTnnhg01s2LE/0eGINChKJCKBb0qnrEx0KCINihKJSCBSOuW1JUV8vEmlU0SqS4lEJEqkdMqjr6l0ikh1xZVIzKyDmc01szXBa2aMPpeY2adRU6mZjQmWPW9mi83sMzObbmYZx6x7jZl58Kx2zCzfzA5GbeuZeOIXOVbr9DT+YXi4dMq8FcWJDkekQYj3jOQhYL679wbmB++P4u4L3H2wuw8GLgUOAK8Hi+919zPc/XRgEzAhsp6ZtQHuAT44ZpPrIttz9zvjjF/kONefHS6dMnHWCpVOEamGeBPJaGBqMD8VGFNF/2uAWe5+AMDd9wCYmQEtgeixhJ8BjwGlccYoUiPNUlN4YGS4dMofFql0ikhV4k0kOe6+DSB4za6i/1jgxegGM5sCFAH9gElB2xCgq7v/JcY2upvZJ2a20MyGxRm/SEwjB4RLp/xirkqniFSlykRiZvPMbGmMaXRNdmRmecAgYE50u7vfDHQGVgDXm1kK8ATwjzE2sw3o5u5DgPuAF8ys7Qn2d7uZFZpZYUlJSU1CFTmqdMrzKp0iUqkqE4m7D3f3gTGmGcD2IEFEEkVlVyevA/7k7kdi7KMcmAZcDbQBBgJvmNlG4FxgppmF3P2Qu38ZrLMIWAf0OUHcz7l7yN1DWVlZVX1MkeNESqc8o9IpIpWKd2hrJjA+mB8PzKik7ziihrUsrFdkHrgKWOnuu929k7vnu3s+8D4wyt0LzSzLzFKDdXoAvYH1cX4GkRNS6RSRqsWbSCYCI8xsDTAieI+ZhcxscqSTmeUDXYGFUesaMNXMlgBLgDzgkSr2dyHwmZktBqYDd7r7zjg/g8gJ9czKYOzZKp0iUhlrCjddhUIhLywsTHQY0kAV7y3l4sff4OK+Wfz6b89KdDgi9cbMFrl7qKp+urNdpArZbVpw27Bw6ZRPVDpF5DhKJCLVcNuFkdIpK1U6ReQYSiQi1ZCRnsYPhvfmw407ma/SKSJHUSIRqaaxZ3elR6fWTJy9UqVTRKIokYhUU7PUFB4o6Mva4n0qnSISRYlEpAZGDsjlzG7teUKlU0S+pkQiUgPh0in9KVbpFJGvKZGI1FAovwOXn5bDs2+uV+kUEZRIRE7KAwX9OHiknEkqnSKiRCJyMnplZ3D92V35vUqniCiRiJysfxjem+ZpKfz7nFWJDkUkoZRIRE5SpHTKq0u2qXSKNGlKJCJxCJdOac6js1Q6RZouJRKROIRLp/Thww0qnSJNlxKJSJwipVN+rtIpde6LXQdZvX0ve0uPe9CqJFBaogMQaegipVPu/N3HTF+0hbFDuyU6pEZlbfFeZi8tYvayIpZ+sefr9jbpaeS1b0Fuu5Z0bteCvHYtyWvfgs5Rry2bpyYw8qYjrkRiZh0IP2s9H9gIXOfuXx3T5xLgiaimfsBYd3/FzJ4HQoSflrgauMnd95nZTcDjwBfBOk+5++Rge+OBnwTt/+LuU+P5DCK1IVI65RdzVzNqcGdaNdffaCfL3Vn6xR5mL9vG7KVFrCsJf736zG7t+dEV/cht15Jtuw6ybXcpW4PX5Vv3xLw5tH2rZuS2bUHn9i3Ja/fNa167lnRu34Lcdi1IT1OyiVdcT0g0s8eAne4+0cweAjLd/cFK+ncA1gJd3P2AmbV19z3Bsl8AxcG2bgJC7j4hxvqFhJOPA4uAs45NXsfSExKlPny0cSfXPvMeP7y8DxMu7Z3ocBqU8gpn0edfMXtpEXOWFfHFroOkphjn9uhAwYBcLh+QS07bFpVu41BZOdt3H2Lr7oNs232QrbtK2bb7INt2lbJ1dylFuw/y1YHjh8Q6ZTQnN5Jc2rUg75ikk9O2Bc1Sm+ZVgOo+ITHeP5tGAxcH81OBN4ATJhLgGmCWux8AiEoiBrQknBwqMxKYG3lOu5nNBQqAF08ufJHac3ZQOuWZhesZN7QbHTPSEx1SUjtcVsF7679k9tIi5i4vYse+wzRPS+HC3p34h+G9Gd4/h8zWzau9vfS0VLp1bEW3jq1O2Ofg4fJwcok6m4kknU1fHuD99V+yt/ToYpxmkJWRTl77b4bQOrc/eigtq006qSl20seioYs3keS4+zYAd99mZtlV9B8L/CK6wcymAFcAy4F/jFp0tZldSHjI61533wycAmyO6rMlaBNJCg8U9GPkL99k0l/X8tNRAxIdTtI5eLichatLmLOsiHkrtrO3tIzWzVO5pF82BQNzubhvNhnpdTcs2LJ5Kj2yMuiRlXHCPvsOlbFt10G27i496nWUUrhIAAAMyUlEQVTb7lJWbd/LG6tKOHik/Kh1UlOMnDbpx53NRA+hdWqdTkojTTZV/sTMbB6QG2PRj2uyIzPLAwYBc6Lb3f1mM0sFJgHXA1OAPwMvuvshM7uT8NnOpYSvpRwr5lmMmd0O3A7QrZsufkr9iJRO+d37n3PTefnkd2qd6JASbk/pERasLGbWkiLeWF1M6ZEK2rdqRsGAXAoG5nJ+r060aJY81yky0tPondOG3jltYi53d/YcLDvBENpBln6xm9eXb+dw2dHf4GuemkJOu/SjhtAiZzi5QfLJbNWM8ABNwxLvNZJVwMXB2Uge8Ia79z1B3x8AA9z99hMsvwi4392/c0x7KuHrMO3MbFywvzuCZc8G+6x0aEvXSKQ+Fe8p5aLH3+DS/tk8fcOZiQ4nIXbsO8S85duZvayId9bu4Ei5k90mnYKBuRQMyGVo9w6kNeLrDu7Ozv2HjxpC2xokm8jQWtHuUsoqjv7/t0WzlPCQWYwhtEhb2xZp9ZZs6usayUxgPDAxeJ1RSd9xwMNRARrQ093XBvNXASuDZXmRITNgFLAimJ8D/JuZZQbvL4/epkgyyG7bgtsu7MGT89dw6wVfMaRbZtUrNQJbdx1kzrIiZi8t4qONO6lw6NahFTef352RA3IZ0rV9ox3aOZaZ0TEjnY4Z6Qw8pV3MPhUVzo59h2IOoW3dfZB31u6geG8px+QaWjdP/WYILeo6TfisJpxsWtfh8GAs8Z6RdAReBroBm4Br3X2nmYWAO9391qBfPvAO0NXdK4K2FOAtoC3hIavFwF3uvsfMHiWcQMqAnUF7JMn8PfCjIIR/dfcpVcWpMxKpb/sOlXHx4wvokZXBtNvPbZDDFdWxvmQfs5cVMWdpEYu37Aagb04bRgZnHv3z2jTaz14fysorKN576KghtK1RZzXbdpdSsvf4rz23bZH29XWa07u0594RfU5q/9U9I4krkTQUSiSSCL99byP/NGMZz48PcVn/nESHUyvcneXb9jAnuEFw9fZ9AJzRtT0FA3IZOSCn0gvZUvsOl1WwfU/sIbStu0rp1qEVz3zvrJPathJJFCUSSYQj5RVc/sSbpKUYs34wrMFeE6iocD7Z/NXXd5dv3nmQFIOh3b+5x6Nz+5aJDlPqQH1dIxGRE2iWmsIDI/ty1+8/5o8fb+H6sxvOtwePlFfw4YadX98gWLz3EM1SjQt6dWLCJb0Y3j9H98nI15RIROpQwcBchgSlU646I7lLp5QeKeftNTuYHdzjsevAEVo2S+XivlkUDMzlkn7ZtG3RLNFhShJK3t9qkUbAzPjRFf259pn3+K+3NyRd6ZR9h8pYsLKY2cuKeGNlMfsPl9O2RRrD++cwcmAuF/bOUuFDqZISiUgdOzu/AyOSqHTKV/sPM3fFduYsLeKttTs4XFZBp4zmjB5yCgUDcjm3R0eapzXM6zmSGEokIvXgwQSXTinaXcrry8P3eHywYSflFc4p7VvyvXNPpWBgLmd2y2zStaIkPkokIvWgV3YG14Xqt3TK51/u//qbVp9s2vV1HHdd1JOCgbkM6NxW93hIrVAiEakn9w7vzSuffMHjr6+qk9Ip7s7q7fu+Th4rtoUfAjXwlLbcP7IvIwfk0Cs7dv0okXgokYjUk+y2LbhtWHee/Otabhu2i8Fd28e9zYoK57Mvdn/9Nd0NO/ZjBqFTM/nJlf0ZOSCXrh1OXFZdpDYokYjUo9sv6snvP9jEo6+t4KWTLJ1SVl7BRxu/Ys6ycPLYtruUtBTjWz07cuuw7ow4LYfsNpU/BEqkNimRiNSjjPQ0fjC8N/93xjL+urK42qVTDpWV8+7a4CFQK7azc/9h0tNSuKhPFveP7Mtl/XJo10r3eEhiKJGI1LNxQ7sx5Z2NTJy1kov6ZJ2wdMr+Q2UsXF3C7KVF/HVlMfsOldEmPY1L+2dTMCCXi/pmJfUNjtJ06LdQpJ5VVjpl94EjzFsRfo7Hm6tLOFRWQYfWzfnO6XmMHJjLeT07kp6mGwQluSiRiCRAdOmUb/XoxFtrw2ce7637krIKJ69dC8YN7UbBwFxCp2Y22IKP0jSo+q9Igny4YSfXPfve1+/zO7aiYGAe3x6Yy+ld2ukeD0k4Vf8VSXJDu3fg4W/3o/RIBQUDc+mTk6HkIQ1SXInEzDoA04B8YCNwnbt/dUyfS4Anopr6AWPd/RUzex4IEX5C4mrgJnffZ2Y3AY8DXwTrPOXuk4PtlQNLgvZN7j4qns8gkkh3XNQz0SGIxC3egdeHgPnu3huYH7w/irsvcPfB7j4YuBQ4ALweLL7X3c9w99MJP6p3QtSq0yLrRZJI4GBUu5KIiEiCxZtIRgNTg/mpwJgq+l8DzHL3AwDuvgfAwufzLYHGf8FGRKSRiTeR5Lj7NoDgNbuK/mOBF6MbzGwKUER4yGtS1KKrzewzM5tuZl2j2luYWaGZvW9mVSUuERGpY1UmEjObZ2ZLY0yja7IjM8sDBgFzotvd/WagM7ACuD5o/jOQHwx5zeObsx6AbsG3CG4AfmlmMQeZzez2IOEUlpSU1CRUERGpgSoTibsPd/eBMaYZwPYgQUQSRXElm7oO+JO7H4mxj3LCF+2vDt5/6e6HgsW/Ac6K6rs1eF0PvAEMOUHcz7l7yN1DWVlZVX1MERE5SfEObc0Exgfz44EZlfQdR9SwloX1iswDVwErg/d5UeuNIny2gpllmll6MN8JOB9YHudnEBGROMR7H8lE4GUzu4Xwt66uBTCzEHCnu98avM8HugILo9Y1YKqZtQ3mFwN3BcvuMbNRQBmwE7gpaO8PPGtmFYST4ER3VyIREUkg3dkuIiIxVffO9iaRSMysBPg8jk10AnbUUji1SXHVjOKqGcVVM40xrlPdvcqLzE0ikcTLzAqrk5Xrm+KqGcVVM4qrZppyXCopKiIicVEiERGRuCiRVM9ziQ7gBBRXzSiumlFcNdNk49I1EhERiYvOSEREJC5NOpGYWYGZrTKztWZ2XAl8M7vJzErM7NNgujVq2XgzWxNM449dN4FxlUe1z6zPuII+15nZcjNbZmYvRLUn7HhVEVfCjpeZPRG179VmtitqWSJ/vyqLq86OVzVj62ZmC8zsEwsXdb0iatnDwXqrzGxkMsRlZvlmdjDqmD1Tz3Gdambzg5jeMLMuUctq73fM3ZvkBKQC64AeQHPCd9afdkyfmwg/VOvYdTsA64PXzGA+M9FxBcv2JfB49QY+iRwLIDtJjlfMuBJ9vI7pfzfwX8lwvE4UV10erxr8LJ8D7grmTwM2Rs0vBtKB7sF2UpMgrnxgaQKP1x+A8cH8pcBv6+J3rCmfkQwF1rr7enc/DLxE+Pkq1TESmOvuOz38RMi5QEESxFWXqhPXbcDTwTHB3SNFPBN9vE4UV12q6c8xuhZdoo/XieKqa9WJzYG2wXw7YGswPxp4yd0PufsGYG2wvUTHVZeqE9dphB86CLAganmt/o415URyCrA56v2WoO1YsZ6LUt116zsuqLvntVQnrj5AHzN7J9h/QQ3WTURckNjjBYSHHwj/Ff3Xmq5bz3FB3T4PqDqx/RS40cy2AK8RPmOq7rqJiAugezDktdDMhtVSTNWNazFBVXXgu0AbM+tYzXWrrSknEovRduxX2E70XJTqrJuIuKCaz2upo7jSCA8jXUz4L9nJZta+musmIi5I7PGKGAtM9/DjFGq6bk3FExfU3fGqbmzjgP929y7AFcBvzSylmusmIq5thI/ZEOA+4AULF6qtr7h+CFxkZp8AFwFfEC6GW6vHqyknki2EKxJHdOGY01E/8XNRqlw3QXHh1XxeS13EFfSZ4e5HguGFVYT/A0/o8aokrkQfr4hjnxya6ON1orjq8nhVN7ZbgJeDGN4DWhCuJZXoYxYzrmCo7cugfRHhaxp96isud9/q7n8TJLIfB227q/mZqq8uLgI1hInwX6nrCZ+6Ry5UDTimT17U/HeB9/2bC1UbCF+kygzmOyRBXJlAejDfCVhDJRdS6yCuAmBq1P43Ax2T4HidKK6EHq+gX19gI8E9Xcnw+1VJXHV2vGrws5wF3BTM9yf8n58BAzj6Yvt6au9iezxxZUXiIHxR/It6/t3vBKQE8/8KPFIXv2O18gvQUCfCp6CrCf+V8OOg7RFgVDD/KLAs+AEtAPpFrfv3hC/orQVuToa4gPOAJUH7EuCWeo7LgF8QftjYEmBskhyvmHEl+ngF739K+Lk6x66bsON1orjq+nhV82d5GvBOEMOnwOVR6/44WG8V8O1kiIvw9YnIv9WPgavqOa5rCCf81cBkgj8Eavt3THe2i4hIXJryNRIREakFSiQiIhIXJRIREYmLEomIiMRFiUREROKiRCIiInFRIhERkbgokYiISFz+P5lHvLsiUxi4AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot CV误差曲线\n",
    "test_means = grid_search.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid_search.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid_search.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid_search.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "x_axis = colsample_bytree_s\n",
    "\n",
    "plt.plot(x_axis, -test_means)\n",
    "#plt.errorbar(x_axis, -test_scores[:,i], yerr=test_stds[:,i] ,label = str(max_depths[i]) +' Test')\n",
    "#plt.errorbar(x_axis, -train_scores[:,i], yerr=train_stds[:,i] ,label = str(max_depths[i]) +' Train')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 减小学习率，调整n_estimators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best n_estimators: 8677\n",
      "best cv score: 0.7564434273634714\n"
     ]
    }
   ],
   "source": [
    "params = {'boosting_type':'goss',\n",
    "         'objective':'binary',\n",
    "         'n_jobs':4,\n",
    "         'learning_rate':0.01,\n",
    "          #'n_estimators':n_estimators_1,\n",
    "         'num_leaves': 800,\n",
    "         'max_depth':12,\n",
    "         'colsample_bytree':0.7,\n",
    "         'min_child_samples':40,\n",
    "         'verbosity':5}\n",
    "\n",
    "n_estimators_2 = get_n_estimators(params , x_train , y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 用所有训练数据，采用最佳参数重新训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LGBMClassifier(boosting_type='goss', class_weight=None, colsample_bytree=0.7,\n",
       "        importance_type='split', learning_rate=0.01, max_depth=12,\n",
       "        min_child_samples=40, min_child_weight=0.001, min_split_gain=0.0,\n",
       "        n_estimators=8677, n_jobs=4, num_leaves=800, objective='binary',\n",
       "        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,\n",
       "        subsample=1.0, subsample_for_bin=200000, subsample_freq=0,\n",
       "        verbosity=5)"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = {'boosting_type':'goss',\n",
    "         'objective':'binary',\n",
    "         'n_jobs':4,\n",
    "         'learning_rate':0.01,\n",
    "         'n_estimators':n_estimators_2,\n",
    "         'num_leaves': 800,\n",
    "         'max_depth':12,\n",
    "         'colsample_bytree':0.7,\n",
    "         'min_child_samples':40,\n",
    "         'verbosity':5}\n",
    "\n",
    "lg = LGBMClassifier(silent=False,  **params)\n",
    "lg.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 保存模型用于测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle as pk\n",
    "\n",
    "pk.dump(lg, open(\"KKbox_goss_lightGBM.pkl\", 'wb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 特征重要性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame({\"columns\":list( x_train.columns ), \"importance\":list(lg.feature_importances_.T)})\n",
    "df = df.sort_values(by=['importance'],ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>columns</th>\n",
       "      <th>importance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>msno</td>\n",
       "      <td>1059562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>song_id</td>\n",
       "      <td>763259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>song_length</td>\n",
       "      <td>753478</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>artist_name</td>\n",
       "      <td>741379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>bd</td>\n",
       "      <td>721630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>composer</td>\n",
       "      <td>639811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>city</td>\n",
       "      <td>442859</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>genre_ids</td>\n",
       "      <td>353503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>source_screen_name</td>\n",
       "      <td>279891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>source_type</td>\n",
       "      <td>266790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>registered_via</td>\n",
       "      <td>222693</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>source_system_tab</td>\n",
       "      <td>190578</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>language</td>\n",
       "      <td>166341</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               columns  importance\n",
       "0                 msno     1059562\n",
       "1              song_id      763259\n",
       "10         song_length      753478\n",
       "2          artist_name      741379\n",
       "7                   bd      721630\n",
       "3             composer      639811\n",
       "8                 city      442859\n",
       "11           genre_ids      353503\n",
       "5   source_screen_name      279891\n",
       "6          source_type      266790\n",
       "9       registered_via      222693\n",
       "4    source_system_tab      190578\n",
       "12            language      166341"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAD8CAYAAACyyUlaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEgZJREFUeJzt3X+MZWV9x/H3p6wo2lIQRmN3SZfGjZWStuIEtzVpGreFRYzLH5JgWtlYmk0MWlub1KX9g0RTQ9OmtrSWhsiWJSVSQm3YKEo3aGOaKDKoAXFrd4IUpmxldJGSmpbSfvvHfba5jHdmmHlmOXvZ9yu5ued8z3PO8x1d9jPnx72bqkKSpB4/NHQDkqTpZ5hIkroZJpKkboaJJKmbYSJJ6maYSJK6GSaSpG6GiSSpm2EiSeq2aegGXihnn312bd26deg2JGmq3H///d+pqpnVxp00YbJ161bm5uaGbkOSpkqSf3k+47zMJUnqZphIkroZJpKkboaJJKmbYSJJ6maYSJK6GSaSpG6GiSSpm2EiSep20nwCvsfWvZ/esGM9ct2lG3YsSTpReGYiSepmmEiSuhkmkqRuhokkqZthIknqZphIkroZJpKkbquGSZJ9SZ5I8vWx2iuTHExyuL2f2epJcn2S+SQPJLlgbJ/dbfzhJLvH6m9M8mDb5/okWe8ckqRhPJ8zk5uBnUtqe4F7qmobcE9bB7gE2NZee4AbYBQMwLXAm4ALgWuPhUMbs2dsv53rmUOSNJxVw6SqvgAcXVLeBexvy/uBy8bqt9TIl4AzkrwGuBg4WFVHq+pJ4CCws207vaq+WFUF3LLkWGuZQ5I0kPXeM3l1VR0BaO+vavXNwGNj4xZabaX6woT6eub4AUn2JJlLMre4uLimH1CS9Pxt9A34TKjVOurrmeMHi1U3VtVsVc3OzMysclhJ0nqtN0y+fezSUnt/otUXgHPGxm0BHl+lvmVCfT1zSJIGst4wOQAceyJrN3DnWP3K9sTVduCpdonqbuCiJGe2G+8XAXe3bU8n2d6e4rpyybHWMockaSCrfgV9kk8AvwicnWSB0VNZ1wG3J7kKeBS4vA2/C3grMA98H3g3QFUdTfJh4L427kNVdeym/nsYPTF2GvCZ9mKtc0iShrNqmFTVO5fZtGPC2AKuXuY4+4B9E+pzwPkT6t9d6xySpGH4CXhJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1M0wkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1M0wkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1K0rTJL8VpKHknw9ySeSvCzJuUnuTXI4yd8kObWNfWlbn2/bt44d55pW/2aSi8fqO1ttPsnesfrEOSRJw1h3mCTZDPwGMFtV5wOnAFcAfwB8tKq2AU8CV7VdrgKerKrXAh9t40hyXtvvp4CdwF8kOSXJKcDHgEuA84B3trGsMIckaQC9l7k2Aacl2QS8HDgCvAW4o23fD1zWlne1ddr2HUnS6rdV1X9V1beAeeDC9pqvqoer6hngNmBX22e5OSRJA1h3mFTVvwJ/BDzKKESeAu4HvldVz7ZhC8DmtrwZeKzt+2wbf9Z4fck+y9XPWmGO50iyJ8lckrnFxcX1/qiSpFX0XOY6k9FZxbnAjwGvYHRJaqk6tssy2zaq/oPFqhuraraqZmdmZiYNkSRtgJ7LXL8EfKuqFqvqv4FPAj8PnNEuewFsAR5vywvAOQBt+48CR8frS/ZZrv6dFeaQJA2gJ0weBbYneXm7j7ED+AbweeAdbcxu4M62fKCt07Z/rqqq1a9oT3udC2wDvgzcB2xrT26dyugm/YG2z3JzSJIG0HPP5F5GN8G/AjzYjnUj8EHgA0nmGd3fuKntchNwVqt/ANjbjvMQcDujIPoscHVV/U+7J/Je4G7gEHB7G8sKc0iSBpDRL/ovfrOzszU3N7eufbfu/fSG9fHIdZdu2LEk6XhLcn9Vza42zk/AS5K6GSaSpG6bVh+i483LaJKmnWcmkqRunplIelHyjP+F5ZmJJKmbZyYnAX9Dk3S8eWYiSepmmEiSuhkmkqRu3jORppj3w3Si8MxEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1M0wkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEnd/PdM1M1/U0OSZyaSpG6GiSSpm2EiSerWFSZJzkhyR5J/SnIoyc8leWWSg0kOt/cz29gkuT7JfJIHklwwdpzdbfzhJLvH6m9M8mDb5/okafWJc0iShtF7ZvKnwGer6ieBnwEOAXuBe6pqG3BPWwe4BNjWXnuAG2AUDMC1wJuAC4Frx8Lhhjb22H47W325OSRJA1h3mCQ5HfgF4CaAqnqmqr4H7AL2t2H7gcva8i7glhr5EnBGktcAFwMHq+poVT0JHAR2tm2nV9UXq6qAW5Yca9IckqQB9JyZ/ASwCPxVkq8m+XiSVwCvrqojAO39VW38ZuCxsf0XWm2l+sKEOivMIUkaQE+YbAIuAG6oqjcA/8HKl5syoVbrqD9vSfYkmUsyt7i4uJZdJUlr0BMmC8BCVd3b1u9gFC7fbpeoaO9PjI0/Z2z/LcDjq9S3TKizwhzPUVU3VtVsVc3OzMys64eUJK1u3WFSVf8GPJbkda20A/gGcAA49kTWbuDOtnwAuLI91bUdeKpdorobuCjJme3G+0XA3W3b00m2t6e4rlxyrElzSJIG0Pt1Ku8Dbk1yKvAw8G5GAXV7kquAR4HL29i7gLcC88D321iq6miSDwP3tXEfqqqjbfk9wM3AacBn2gvgumXmkCQNoCtMquprwOyETTsmjC3g6mWOsw/YN6E+B5w/of7dSXNIkobhJ+AlSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1M0wkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1M0wkSR1M0wkSd02Dd2ApBPX1r2f3rBjPXLdpRt2LJ14PDORJHUzTCRJ3QwTSVI3w0SS1K07TJKckuSrST7V1s9Ncm+Sw0n+Jsmprf7Stj7ftm8dO8Y1rf7NJBeP1Xe22nySvWP1iXNIkoaxEWcm7wcOja3/AfDRqtoGPAlc1epXAU9W1WuBj7ZxJDkPuAL4KWAn8BctoE4BPgZcApwHvLONXWkOSdIAusIkyRbgUuDjbT3AW4A72pD9wGVteVdbp23f0cbvAm6rqv+qqm8B88CF7TVfVQ9X1TPAbcCuVeaQJA2g98zkT4DfAf63rZ8FfK+qnm3rC8DmtrwZeAygbX+qjf//+pJ9lquvNIckaQDrDpMkbwOeqKr7x8sThtYq2zaqPqnHPUnmkswtLi5OGiJJ2gA9ZyZvBt6e5BFGl6DewuhM5Ywkxz5ZvwV4vC0vAOcAtO0/Chwdry/ZZ7n6d1aY4zmq6saqmq2q2ZmZmfX/pJKkFa07TKrqmqraUlVbGd1A/1xV/QrweeAdbdhu4M62fKCt07Z/rqqq1a9oT3udC2wDvgzcB2xrT26d2uY40PZZbg5J0gCOx+dMPgh8IMk8o/sbN7X6TcBZrf4BYC9AVT0E3A58A/gscHVV/U+7J/Je4G5GT4vd3sauNIckaQAb8kWPVfUPwD+05YcZPYm1dMx/Apcvs//vA78/oX4XcNeE+sQ5JEnD8BPwkqRuhokkqZthIknqZphIkrr5Ly1K0jr4r1A+l2cmkqRuhokkqZthIknqZphIkroZJpKkboaJJKmbYSJJ6maYSJK6GSaSpG6GiSSpm2EiSepmmEiSuhkmkqRufmuwTmp+86u0MTwzkSR1M0wkSd28zCUdR15G08nCMxNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1M0wkSR183MmkgbhZ3BeXNZ9ZpLknCSfT3IoyUNJ3t/qr0xyMMnh9n5mqyfJ9UnmkzyQ5IKxY+1u4w8n2T1Wf2OSB9s+1yfJSnNIkobRc5nrWeC3q+r1wHbg6iTnAXuBe6pqG3BPWwe4BNjWXnuAG2AUDMC1wJuAC4Frx8Lhhjb22H47W325OSRJA1h3mFTVkar6Slt+GjgEbAZ2AfvbsP3AZW15F3BLjXwJOCPJa4CLgYNVdbSqngQOAjvbttOr6otVVcAtS441aQ5J0gA25AZ8kq3AG4B7gVdX1REYBQ7wqjZsM/DY2G4LrbZSfWFCnRXmkCQNoDtMkvww8LfAb1bVv680dEKt1lFfS297kswlmVtcXFzLrpKkNegKkyQvYRQkt1bVJ1v52+0SFe39iVZfAM4Z230L8Pgq9S0T6ivN8RxVdWNVzVbV7MzMzPp+SEnSqtb9aHB7suom4FBV/fHYpgPAbuC69n7nWP29SW5jdLP9qao6kuRu4CNjN90vAq6pqqNJnk6yndHlsyuBP1tlDr3I+PioNB16PmfyZuBdwINJvtZqv8voL/jbk1wFPApc3rbdBbwVmAe+D7wboIXGh4H72rgPVdXRtvwe4GbgNOAz7cUKc0iSBrDuMKmqf2TyfQ2AHRPGF3D1MsfaB+ybUJ8Dzp9Q/+6kOSRJw/DrVCRJ3fw6FUk6AU3b/ULPTCRJ3QwTSVI3w0SS1M0wkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1M0wkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1M0wkSR1m9owSbIzyTeTzCfZO3Q/knQym8owSXIK8DHgEuA84J1Jzhu2K0k6eU1lmAAXAvNV9XBVPQPcBuwauCdJOmlNa5hsBh4bW19oNUnSAFJVQ/ewZkkuBy6uql9v6+8CLqyq9y0ZtwfY01ZfB3zzOLd2NvCd4zzH8TLNvcN09z/NvYP9D+mF6P3Hq2pmtUGbjnMTx8sCcM7Y+hbg8aWDqupG4MYXqqkkc1U1+0LNt5GmuXeY7v6nuXew/yGdSL1P62Wu+4BtSc5NcipwBXBg4J4k6aQ1lWcmVfVskvcCdwOnAPuq6qGB25Kkk9ZUhglAVd0F3DV0H0u8YJfUjoNp7h2mu/9p7h3sf0gnTO9TeQNeknRimdZ7JpKkE4hhsgGm+atdkpyT5PNJDiV5KMn7h+5prZKckuSrST41dC9rleSMJHck+af2/8HPDd3TWiT5rfbn5utJPpHkZUP3tJwk+5I8keTrY7VXJjmY5HB7P3PIHleyTP9/2P7sPJDk75KcMVR/hkmnF8FXuzwL/HZVvR7YDlw9Zf0DvB84NHQT6/SnwGer6ieBn2GKfo4km4HfAGar6nxGD8NcMWxXK7oZ2Lmkthe4p6q2Afe09RPVzfxg/weB86vqp4F/Bq55oZs6xjDpN9Vf7VJVR6rqK235aUZ/mU3Ntwkk2QJcCnx86F7WKsnpwC8ANwFU1TNV9b1hu1qzTcBpSTYBL2fC571OFFX1BeDokvIuYH9b3g9c9oI2tQaT+q+qv6+qZ9vqlxh95m4Qhkm/F81XuyTZCrwBuHfYTtbkT4DfAf536EbW4SeAReCv2mW6jyd5xdBNPV9V9a/AHwGPAkeAp6rq74ftas1eXVVHYPSLFfCqgfvp8WvAZ4aa3DDplwm1qXtELskPA38L/GZV/fvQ/TwfSd4GPFFV9w/dyzptAi4AbqiqNwD/wYl9meU52v2FXcC5wI8Br0jyq8N2dXJK8nuMLlnfOlQPhkm/5/XVLieyJC9hFCS3VtUnh+5nDd4MvD3JI4wuL74lyV8P29KaLAALVXXsTPAORuEyLX4J+FZVLVbVfwOfBH5+4J7W6ttJXgPQ3p8YuJ81S7IbeBvwKzXgZz0Mk35T/dUuScLomv2hqvrjoftZi6q6pqq2VNVWRv+7f66qpuY346r6N+CxJK9rpR3ANwZsaa0eBbYneXn7c7SDKXqAoDkA7G7Lu4E7B+xlzZLsBD4IvL2qvj9kL4ZJp3bz69hXuxwCbp+yr3Z5M/AuRr/Vf6293jp0UyeR9wG3JnkA+FngIwP387y1M6o7gK8ADzL6++SE+UT2Ukk+AXwReF2ShSRXAdcBv5zkMPDLbf2EtEz/fw78CHCw/bf7l4P15yfgJUm9PDORJHUzTCRJ3QwTSVI3w0SS1M0wkSR1M0wkSd0ME0lSN8NEktTt/wC2Q7ba1iYOjAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.bar(range(len(lg.feature_importances_)), lg.feature_importances_)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 对测试集的数据进行预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "test=data[data['type']=='test']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>msno</th>\n",
       "      <th>song_id</th>\n",
       "      <th>artist_name</th>\n",
       "      <th>composer</th>\n",
       "      <th>id</th>\n",
       "      <th>source_system_tab</th>\n",
       "      <th>source_screen_name</th>\n",
       "      <th>source_type</th>\n",
       "      <th>target</th>\n",
       "      <th>type</th>\n",
       "      <th>bd</th>\n",
       "      <th>city</th>\n",
       "      <th>registered_via</th>\n",
       "      <th>song_length</th>\n",
       "      <th>genre_ids</th>\n",
       "      <th>language</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5511</th>\n",
       "      <td>145843</td>\n",
       "      <td>517032</td>\n",
       "      <td>238157</td>\n",
       "      <td>444941</td>\n",
       "      <td>64</td>\n",
       "      <td>35</td>\n",
       "      <td>16</td>\n",
       "      <td>35</td>\n",
       "      <td>NaN</td>\n",
       "      <td>test</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>222693.0</td>\n",
       "      <td>1259.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5512</th>\n",
       "      <td>145843</td>\n",
       "      <td>517988</td>\n",
       "      <td>296354</td>\n",
       "      <td>213553</td>\n",
       "      <td>401</td>\n",
       "      <td>35</td>\n",
       "      <td>16</td>\n",
       "      <td>35</td>\n",
       "      <td>NaN</td>\n",
       "      <td>test</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>257277.0</td>\n",
       "      <td>1011.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5513</th>\n",
       "      <td>145843</td>\n",
       "      <td>89014</td>\n",
       "      <td>159111</td>\n",
       "      <td>263612</td>\n",
       "      <td>402</td>\n",
       "      <td>35</td>\n",
       "      <td>16</td>\n",
       "      <td>35</td>\n",
       "      <td>NaN</td>\n",
       "      <td>test</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>346853.0</td>\n",
       "      <td>465.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5514</th>\n",
       "      <td>145843</td>\n",
       "      <td>32378</td>\n",
       "      <td>141974</td>\n",
       "      <td>141974</td>\n",
       "      <td>1076</td>\n",
       "      <td>35</td>\n",
       "      <td>16</td>\n",
       "      <td>35</td>\n",
       "      <td>NaN</td>\n",
       "      <td>test</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>179258.0</td>\n",
       "      <td>1011.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5515</th>\n",
       "      <td>145843</td>\n",
       "      <td>60498</td>\n",
       "      <td>187339</td>\n",
       "      <td>108538</td>\n",
       "      <td>1077</td>\n",
       "      <td>35</td>\n",
       "      <td>16</td>\n",
       "      <td>35</td>\n",
       "      <td>NaN</td>\n",
       "      <td>test</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>194359.0</td>\n",
       "      <td>1011.0</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        msno  song_id  artist_name  composer    id  source_system_tab  \\\n",
       "5511  145843   517032       238157    444941    64                 35   \n",
       "5512  145843   517988       296354    213553   401                 35   \n",
       "5513  145843    89014       159111    263612   402                 35   \n",
       "5514  145843    32378       141974    141974  1076                 35   \n",
       "5515  145843    60498       187339    108538  1077                 35   \n",
       "\n",
       "      source_screen_name  source_type  target  type  bd  city  registered_via  \\\n",
       "5511                  16           35     NaN  test NaN     1               7   \n",
       "5512                  16           35     NaN  test NaN     1               7   \n",
       "5513                  16           35     NaN  test NaN     1               7   \n",
       "5514                  16           35     NaN  test NaN     1               7   \n",
       "5515                  16           35     NaN  test NaN     1               7   \n",
       "\n",
       "      song_length  genre_ids  language  \n",
       "5511     222693.0     1259.0      52.0  \n",
       "5512     257277.0     1011.0      52.0  \n",
       "5513     346853.0      465.0      52.0  \n",
       "5514     179258.0     1011.0      52.0  \n",
       "5515     194359.0     1011.0      52.0  "
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_test=test.drop(['target','type','id'],axis=1)\n",
    "X_id=test['id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = pk.load(open(\"KKbox_goss_lightGBM.pkl\", 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "#输出每类的概率\n",
    "y_test_pred = model.predict_proba(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2556790, 2)"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test_pred.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.374942</td>\n",
       "      <td>0.625058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.469372</td>\n",
       "      <td>0.530628</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.451617</td>\n",
       "      <td>0.548383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.349475</td>\n",
       "      <td>0.650525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.399604</td>\n",
       "      <td>0.600396</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1\n",
       "0  0.374942  0.625058\n",
       "1  0.469372  0.530628\n",
       "2  0.451617  0.548383\n",
       "3  0.349475  0.650525\n",
       "4  0.399604  0.600396"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#生成提交结果\n",
    "out_df = pd.DataFrame(y_test_pred)\n",
    "out_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "columns = np.empty(2, dtype=object)\n",
    "for i in range(2):\n",
    "    columns[i] = 'Class_' + str(i+1)\n",
    "\n",
    "out_df.columns = columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Class_1</th>\n",
       "      <th>Class_2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.374942</td>\n",
       "      <td>0.625058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.469372</td>\n",
       "      <td>0.530628</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.451617</td>\n",
       "      <td>0.548383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.349475</td>\n",
       "      <td>0.650525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.399604</td>\n",
       "      <td>0.600396</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.508575</td>\n",
       "      <td>0.491425</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.436159</td>\n",
       "      <td>0.563841</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.279827</td>\n",
       "      <td>0.720173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.436711</td>\n",
       "      <td>0.563289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.572692</td>\n",
       "      <td>0.427308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0.380557</td>\n",
       "      <td>0.619443</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0.434640</td>\n",
       "      <td>0.565360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>0.558658</td>\n",
       "      <td>0.441342</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>0.516881</td>\n",
       "      <td>0.483119</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0.605895</td>\n",
       "      <td>0.394105</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.700738</td>\n",
       "      <td>0.299262</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>0.502194</td>\n",
       "      <td>0.497806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>0.488542</td>\n",
       "      <td>0.511458</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>0.522200</td>\n",
       "      <td>0.477800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>0.490656</td>\n",
       "      <td>0.509344</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Class_1   Class_2\n",
       "0   0.374942  0.625058\n",
       "1   0.469372  0.530628\n",
       "2   0.451617  0.548383\n",
       "3   0.349475  0.650525\n",
       "4   0.399604  0.600396\n",
       "5   0.508575  0.491425\n",
       "6   0.436159  0.563841\n",
       "7   0.279827  0.720173\n",
       "8   0.436711  0.563289\n",
       "9   0.572692  0.427308\n",
       "10  0.380557  0.619443\n",
       "11  0.434640  0.565360\n",
       "12  0.558658  0.441342\n",
       "13  0.516881  0.483119\n",
       "14  0.605895  0.394105\n",
       "15  0.700738  0.299262\n",
       "16  0.502194  0.497806\n",
       "17  0.488542  0.511458\n",
       "18  0.522200  0.477800\n",
       "19  0.490656  0.509344"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df .head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "out_df_target=out_df['Class_2']\n",
    "out_df_target.columns='target'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "#因为一开始是train和test合并，所以这边id的索引要重置从0开始\n",
    "X_id=X_id.reset_index()\n",
    "X_id=X_id['id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>Class_2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>64</td>\n",
       "      <td>0.625058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>401</td>\n",
       "      <td>0.530628</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>402</td>\n",
       "      <td>0.548383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1076</td>\n",
       "      <td>0.650525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1077</td>\n",
       "      <td>0.600396</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     id   Class_2\n",
       "0    64  0.625058\n",
       "1   401  0.530628\n",
       "2   402  0.548383\n",
       "3  1076  0.650525\n",
       "4  1077  0.600396"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df_end = pd.concat([X_id,out_df_target], axis = 1)\n",
    "out_df_end.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "out_df_end.to_csv(path+\"sampleSubmission.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2556790, 2)"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "最后取预测为1的那一列值，然后查看最后的大小，和test一开始的行数是一致的"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. mnso和song_id不应该加入模型训练？\n",
    "老师解答：应该加入\n",
    "### 2. 最后的结果是概率还是0和1？\n",
    "老师解答：是概率\n",
    "### 3. 特征和结果的相关性是否是强相关性（即绝对性）：比如这次我用10个特征训练模型，最后特征1的相关性最高。下次我用11个特征，不管新加的这个特征相关性高不高，原来的这个特征1的相关性是否和原来一样？如果是相对的，那么在每次不同特征个数的模型中，同一个特征的相关性在该模型中的相关占比是不是一样？\n",
    "老师解答：不一样\n",
    "### 4. 如何判断lightGBM的调优结果是靠谱的？\n",
    "老师解答：预测结果上传到kaggle，查看分数\n",
    "\n",
    "## 我们试着将模型的特征都单独保存，这样训练的时候可以做到按需加载拼装，增加灵活性\n",
    "## 因为提取特征非常的费时，所以后续工作是逐步加入更多维度的特征进行训练"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
